### Replication code for "Misdemeanor Disenfranchisement?"
### Autumn 2018
### See the readme file for more details on what goes where in this replication package
### Contact Ariel White with questions: arwhi@mit.edu

rm(list=ls())
#setwd("/nfs/projects_nobackup/f/FLvote/Texas/merging") 
library(data.table)
library(AER)
library(stargazer)

load("defendants_voter1_deidentified.Rdata") #"voter1" 
voter1 <- voter1[sentencedays <65000,];dim(voter1) #as discussed in footnote.
summary(voter1$sentencedays); dim(voter1) 
summary(voter1[jail==1,]$sentencedays)
voter1[,crtjailavg1:= mean(jail, na.rm=T), by=list(crt, fyear)] 

##summary tables and other little factoids: 
voter1[, yearsatfile := as.numeric(ageatfile)/365]

stargazer(subset(voter1, select=c("anyconv", "fine", "probation", "jail", "sentencedays", "sent1plus", "sent1mplus")), covariate.labels=c("Conviction", "Fine", "Probation", "Jail", "Total Sentence Length (Days)", 'Sentence  $>$ 1year', "Sentence $>$ 1month"), label="defendantsents",  out="defendantsents.tex", title="Criminal Sentencing, 2009-2012", summary.stat=c("mean", "sd"), digits=2) #Table 1 in paper

#and take a quick look here at dispositions and see what we can say about pleas.
voter1[, pleadg := 0]; voter1[grepl("PLEA OF GUILTY", disposition) == T, pleadg := 1] 
courtpleas <- voter1[, list(pleadprop = mean(pleadg, na.rm=T)), by=list(crt, fyear)]
courtpleas 
voter1[,crtpleadavg1:= mean(pleadg, na.rm=T), by=list(crt, fyear)]
cor(voter1$crtjailavg1,voter1$crtpleadavg1) 
table(voter1$disposition)
voter1[, dismissed := 0]; voter1[disposition=="DISMISSED" | disposition=="DISM OTHER", dismissed:=1]
courtdism <- voter1[, list(dismprop = mean(dismissed, na.rm=T)), by=list(crt, fyear)]
voter1[,crtdismavg1:= mean(dismissed, na.rm=T), by=list(crt, fyear)]
cor(voter1$crtjailavg1,voter1$crtdismavg1)
voter1[, gotrial := 0]; voter1[grepl("JURY", disposition) == T, gotrial := 1]; voter1[grepl("TRIAL", disposition) == T, gotrial := 1] 
voter1[,crttrialavg1:= mean(gotrial, na.rm=T), by=list(crt, fyear)]
cor(voter1$crtjailavg1,voter1$crttrialavg1)
sum(voter1$gotrial) 
summary(voter1$crttrialavg1)

#table A13 for the SI. 
courtplearates <- voter1[, list(pleadprop = mean(pleadg, na.rm=T), dismprop = mean(dismissed, na.rm=T), trialprop = mean(gotrial, na.rm=T)), by=list(crt)]
setkey(courtplearates, "crt")
stargazer(courtplearates, summary=F, covariate.labels=c("Courtroom", "Pled Guilty", "Dismissed",  "Trial"), title="Misdemeanor Case Dispositions Across Courtrooms, 2008-2012", label = "casedisps", out="casedispositions_pleas.tex", rownames=F) #Table A13 in SI

#some other descriptive stuff for paper text 
median(voter1$sentencedays)
median(voter1[voter1$sentencedays>0,]$sentencedays) #median conditional on getting some sentence.
mean(voter1$vote2012);mean(voter1$registered)

#how do courtrooms' jailing behavior compare to their fining/probation/conviction behavior?
summary(voter1$crtjailavg1)
cor(voter1$crtjailavg1,voter1$crtfineavg1)
cor(voter1$crtjailavg1,voter1$crtprobationavg1) 
cor(voter1$crtjailavg1,voter1$crtconvrate1) 

##also characterize the main instrument
summary(voter1$crtjailavg1) 
table(voter1$crtjailavg1); dim(table(voter1$crtjailavg1)) 
hist(voter1$crtjailavg1, breaks=59)
courtrooms <- unique(subset(voter1, select=c("fyear", "crt", "crtjailavg1")))
setkey(courtrooms, crt) 

## first, naive OLS: does a jail sentence mean less likely to be registered/vote? (Table 2 in paper)
v1 <- lm(vote2012 ~ jail, data=voter1)
v1a <- lm(vote2012 ~ jail + as.numeric(voterYOB) + black + male  , data=voter1)
v1b <- lm(vote2012 ~ jail + as.numeric(voterYOB) + black + male +jail*black, data=voter1) 
stargazer(v1, v1a, v1b, label= "naiveols", align=T, omit.stat=c("LL","ser","f"), title="OLS estimates of jail's effect on voting", covariate.labels=c("Jail", "Voter Birth Year", "Black", "Male","Jail*Black"),dep.var.labels   = "Voted 2012", out="naiveols.tex", star.cutoffs=c(0.05), notes="$^{*}$p$<$0.05", notes.append=FALSE)

v1.zip <- lm(vote2012 ~ jail + as.numeric(voterYOB) + black + male + as.factor(def_zip), data=voter1); summary(v1.zip) #for reviewer Q about how it looks with geographic FEs; this takes a little while to run.

#control for past turnout as well? I don't trust it for reasons discussed in SI, but again, reviewer request. 
voter1[, vote2008 := 0]
voter1[vh08g1>0 & is.na(vh08g1)==F, vote2008 := 1]
v1a.t <- lm(vote2012 ~ jail + as.numeric(voterYOB) + black + male +vote2008 , data=voter1); summary(v1a.t)
v1a.t.zip <- lm(vote2012 ~ jail + as.numeric(voterYOB) + black + male +vote2008 +as.factor(def_zip) , data=voter1); summary(v1a.t.zip)

stargazer(v1a.t.zip, v1.zip,v1a.t, v1a, v1, label= "naiveolsFEs", align=T, omit.stat=c("LL","ser","f"), omit="def_zip", omit.labels="Zip Code Fixed Effects", title="OLS estimates of jail's effect on voting", covariate.labels=c("Jail", "Voter Birth Year", "Black", "Male", "Voted 2008"),dep.var.labels   = "Voted 2012", out="naiveolszips.tex", star.cutoffs=c(0.05), notes="$^{*}$p$<$0.05", notes.append=FALSE) #Table A7 in SI


#############################################################################
## Now main IV analysis
library(AER)
#also make courtroom dummies for later.
inds <- unique(voter1$crt) #15 courtrooms
voter1[,paste("crt_", (inds), sep=""):=lapply(inds,function(x)crt==x)]

#simplest cut, everyone 
fs2 <- lm(jail ~ crtjailavg1 + fyear, data=voter1); summary(fs2) 
iv2 <- ivreg(vote2012 ~ jail +fyear | crtjailavg1 +fyear, data = voter1); summary(iv2)

stargazer(fs2, iv2, omit=c("fyear"), omit.labels=c("Year dummies"), label="firststageIV", out = "firststageIV_year.tex", title = "Jail Sentences on 2012 Voting", omit.stat=c("rsq", "ser"), dep.var.labels   = c("Jail", "Voted 2012"), covariate.labels = c("Court Jail Average (Yr)", "Jail"), model.names=F,  star.cutoffs=c(0.05), notes="$^{*}$p$<$0.05", notes.append=FALSE) #Table 3 in paper

#and by race 
iv2.i <- ivreg(vote2012 ~ jail +fyear + black + jail*black| crtjailavg1 +fyear + black +crtjailavg1*black, data = voter1); summary(iv2.i) #interaction
fs2.i <- lm(jail ~ crtjailavg1*fyear*black, data=voter1)
stargazer(iv2.i,  omit.stat=c("rsq", "ser"), omit=c("fyear"), omit.labels=c("Year dummies"), covariate.labels=c("Jail", "Black", "Jail x Black"), label = "raceinteractionIV", out="SI_raceinteractionIV.tex", title="Jail's Effect on Voting (Racial Interaction)", add.lines=list(c("First Stage F-Statistic", round(summary(fs2.i)$fstatistic[1], digits=2)))) #Table A23 in SI

##but concerns about racial non-monotonicity: subset the data, regenerate the instruments within-race (though doesn't make huge difference)
voter1[, classA:= 0] #class A or B misdemeanor: carries diff. max sentence length, indicates different severity (will include as a covar later)
voter1[mostsevcharge==5, classA:= 1]

black <- voter1[def_rac=="B",]
white <- voter1[def_rac=="W",]

black[,crtjailavg1u:= mean(jail, na.rm=T), by=list(crt, fyear)]
white[,crtjailavg1u:= mean(jail, na.rm=T), by=list(crt, fyear)]

fs2b <- lm(jail ~ crtjailavg1u + fyear, data=black); summary(fs2b)
iv2b <- ivreg(vote2012 ~ jail +fyear | crtjailavg1u +fyear, data = black); summary(iv2b)

#do the partial wald test
fs2b <- lm(jail ~ crtjailavg1u + fyear, data=black); summary(fs2b)
fs2b1 <- lm(jail ~   fyear, data=black); summary(fs2b1)

iv2b <- ivreg(vote2012 ~ jail +fyear | crtjailavg1u +fyear, data = black); summary(iv2b)

#cluster SE's by courtroom
library(multiwayvcov)

vcov_court <- cluster.vcov(iv2b, black$crt)
iv2b.c <- coeftest(iv2b, vcov_court)

fs2w <- lm(jail ~ crtjailavg1u + fyear, data=white); summary(fs2w)
iv2w <- ivreg(vote2012 ~ jail +fyear | crtjailavg1u +fyear, data = white); summary(iv2w)
vcov_court <- cluster.vcov(iv2w, white$crt)
iv2w.c <- coeftest(iv2w, vcov_court)

#Table A1 in SI
stargazer(iv2b, iv2w, omit=c("fyear"), se = list(iv2b.c[,"Std. Error"],iv2w.c[,"Std. Error"]),
omit.labels=c("Year dummies"), add.lines=list(c("Clustered SE's", "Courtroom", "Courtroom"),c("First Stage F-Statistic", round(summary(fs2b)$fstatistic[1], digits=2), round(summary(fs2w)$fstatistic[1], digits=2))), label="simplestraceIV", out="simplestraceIV_cluster.tex", title="IV estimates: Jail sentences on voting, by race", omit.stat=c("rsq", "ser"), dep.var.labels   = "Voted 2012", column.labels = c("Black Defendants", "White Defendants"))

##############################################################################
#same analysis with courtroom dummies, for SI
fs2a <- lm(jail ~  (crt_2 + crt_3 +crt_4 + crt_5+ crt_6+ crt_7+ crt_8+ crt_9+ crt_10+ crt_11+ crt_12+ crt_13+ crt_14 + crt_15)*fyear, data=voter1); summary(fs2a)
iv2a <- ivreg(vote2012 ~ jail +fyear | (crt_2 + crt_3 +crt_4 + crt_5+ crt_6+ crt_7+ crt_8+ crt_9+ crt_10+ crt_11+ crt_12+ crt_13+ crt_14 + crt_15)*fyear, data=voter1); summary(iv2a) 

fs2b <- lm(jail ~  (crt_2 + crt_3 +crt_4 + crt_5+ crt_6+ crt_7+ crt_8+ crt_9+ crt_10+ crt_11+ crt_12+ crt_13+ crt_14 + crt_15)*fyear, data=black); summary(fs2b)
iv2b <- ivreg(vote2012 ~ jail +fyear | (crt_2 + crt_3 +crt_4 + crt_5+ crt_6+ crt_7+ crt_8+ crt_9+ crt_10+ crt_11+ crt_12+ crt_13+ crt_14 + crt_15)*fyear, data=black); summary(iv2b, diagnostics=T) 

stargazer(iv2a,iv2b , omit=c("fyear", "crt"), omit.labels=c("Year dummies", "Courtroom Dummies"), label="courtroomdummiesSI", out = "courtroomdummiesSI.tex", title = "Jail Sentences on 2012 Voting", omit.stat=c("rsq", "ser"), covariate.labels = c( "Jail"), model.names=F, star.cutoffs=c(0.05), notes="$^{*}$p$<$0.05", notes.append=FALSE, column.labels = c("All", "Black"), 
omit.yes.no = c("Yes", "Yes")) #this last line is bc stargazer is wrongly not automatically reporting omitted dummies here, so am manually fixing it for this table. 
#Table A28 in SI

#exclusion restriction stuff
#try instrumenting for both jail & conviction simultaneously, though will be noisy. 
iv2.both.all <- ivreg(vote2012 ~ anyconv +jail +fyear | crtconvrate1 + crtjailavg1 +fyear, data = voter1); summary(iv2.both.all)
iv2.both.black <- ivreg(vote2012 ~ anyconv +jail +fyear | crtconvrate1 + crtjailavg1 +fyear, data = voter1[def_rac=="B",]); summary(iv2.both.black)
#cluster SE's, make table for SI.
courtroomyears <- c(1:75) #15*5
crtyears <- as.data.frame(cbind((rep(c(1:15), 5)), as.character(c(rep(2008,15), rep(2009,15), rep(2010,15), rep(2011,15), rep(2012,15))), courtroomyears))
colnames(crtyears) <- c("crt","fyear","crtyrnum")
allclust <- merge(as.data.frame(voter1), crtyears, by=c("crt", "fyear"), all.x=T)
dim(allclust); dim(voter1)
iv2.both.all <- ivreg(vote2012 ~ anyconv +jail +fyear | crtconvrate1 + crtjailavg1 +fyear, data = allclust); summary(iv2.both.all)
vcov_court <- cluster.vcov(iv2.both.all, allclust$crt)
iv2.both.all.c <- coeftest(iv2.both.all, vcov_court)

blackclust <- merge(as.data.frame(voter1[def_rac=="B",]), crtyears, by=c("crt", "fyear"), all.x=T)
blackclust <- merge(as.data.frame(black), crtyears, by=c("crt", "fyear"), all.x=T)
iv2.both.black <- ivreg(vote2012 ~ anyconv +jail +fyear | crtconvrate1 + crtjailavg1 +fyear, data = blackclust); summary(iv2.both.black)
vcov_court <- cluster.vcov(iv2.both.black, blackclust$crt)
iv2.both.black.c <- coeftest(iv2.both.black, vcov_court)

stargazer(iv2.both.all, iv2.both.black, omit=c("fyear"), se = list(iv2.both.all.c[,"Std. Error"],iv2.both.black.c[,"Std. Error"]),
omit.labels=c("Year dummies"),covariate.labels = c("Misdemeanor conviction", "Jail sentence"), add.lines=list(c("Clustered SE's", "Courtroom", "Courtroom"),c("First Stage F-Statistic", round(summary(fs2b)$fstatistic[1], digits=2), round(summary(fs2w)$fstatistic[1], digits=2))), label="instrumentingforboth", out="jailconvIV_cluster.tex", title="IV estimates: Jail sentences and Conviction on voting", omit.stat=c("rsq", "ser"), dep.var.labels   = "Voted 2012", column.labels = c("All Defendants", "Black Defendants")) #Table A30 in SI

##also-- indications of nonomonotonicity by race?
## start: rank courtrooms (each year) w/in race and see if they switch
crts_b <- unique(subset(black, select=c("fyear", "crt", "crtjailavg1u")))
crts_w <- unique(subset(white, select=c("fyear", "crt", "crtjailavg1u")))
setkey(crts_b, "fyear","crtjailavg1u")
setkey(crts_w, "fyear","crtjailavg1u")
both <- cbind(crts_b, crts_w) #so they do look different

##and make reduced-form table
rf2b <- lm(vote2012 ~ crtjailavg1u + fyear , data = blackclust); summary(rf2b)
rf2 <- lm(vote2012 ~ crtjailavg1 + fyear , data = allclust); summary(rf2)
stargazer(rf2, rf2b, omit=c("fyear"), omit.labels=c("Year dummies"), label="reducedform", out="reducedform_b.tex", title="Reduced-form: Courtroom assignment on voting", omit.stat=c("adj.rsq", "rsq", "ser"), float.env="sidewaystable",
covariate.labels = c("Courtroom Instrument", "Courtroom Instrument"),dep.var.labels   = "Voted 2012", column.labels = c("All Defendants", "Black Defendants"), star.cutoffs=c(0.05), notes="$^{*}$p$<$0.05", notes.append=FALSE)  #Table A29 in SI 

################################################
### some robustness checks for the SI
################################################
#run this same stuff for one-case people
onecase_b <- black[numcases==1,]
dim(black); dim(onecase_b)
fs2b_1 <- lm(jail ~ crtjailavg1u + fyear, data=onecase_b); summary(fs2b_1)
iv2b_1 <- ivreg(vote2012 ~ jail +fyear | crtjailavg1u +fyear, data = onecase_b); summary(iv2b_1) 
onecase <- voter1[numcases==1,]
dim(voter1); dim(onecase)
fs2_1 <- lm(jail ~ crtjailavg1 + fyear, data=onecase); summary(fs2_1)
iv2_1 <- ivreg(vote2012 ~ jail +fyear | crtjailavg1 +fyear, data = onecase); summary(iv2_1) 

stargazer(iv2_1, iv2b_1, omit=c("fyear"), omit.labels=c("Year dummies"), label="onecaseIV", out="SI_onecaseIV.tex", title="IV estimates: Jail sentences on voting, Defendants with only one misdemeanor case", omit.stat=c("rsq", "ser"),
covariate.labels=c("Courtroom instrument", "Jail"), column.labels = c("All", "Black")) #Table A15 in SI

#run this same stuff for people between ages of 18 and 60 at filing (avoid people obviously too young to vote, or with really bad data)
medage_b <- black[ageatfile >= (18*365) & ageatfile <= (60*365),]
dim(black); dim(medage_b)
fs2b_1 <- lm(jail ~ crtjailavg1u + fyear, data=medage_b); summary(fs2b_1)
iv2b_1 <- ivreg(vote2012 ~ jail +fyear | crtjailavg1u +fyear, data = medage_b); summary(iv2b_1) 
medage <- voter1[ageatfile >= (18*365) & ageatfile <= (60*365),]
dim(voter1); dim(medage)
fs2_1a <- lm(jail ~ crtjailavg1 + fyear, data=medage); summary(fs2_1a)
iv2_1a <- ivreg(vote2012 ~ jail +fyear | crtjailavg1 +fyear, data = medage); summary(iv2_1a) 

stargazer(iv2_1a, iv2b_1, omit=c("fyear"), omit.labels=c("Year dummies"), label="agelimitsIV", out="SI_agelimitsIV.tex", title="IV estimates: Jail sentences on voting, Defendants ages 18-60 only", omit.stat=c("rsq", "ser"), 
covariate.labels=c("Courtroom instrument", "Jail"), column.labels = c("All", "Black")) #Table A14 in SI

###and restrict to just men
men_b <- black[male==1,]
dim(black); dim(men_b)
fs2b_m1 <- lm(jail ~ crtjailavg1u + fyear, data=men_b); summary(fs2b_m1)
iv2b_m1 <- ivreg(vote2012 ~ jail +fyear | crtjailavg1u +fyear, data = men_b); summary(iv2b_m1) 

men <- voter1[male==1,]
dim(voter1); dim(men)
fs2_m1 <- lm(jail ~ crtjailavg1 + fyear, data=men); summary(fs2_m1)
iv2_m1 <- ivreg(vote2012 ~ jail +fyear | crtjailavg1 +fyear, data = men); summary(iv2_m1) 

stargazer(iv2_m1, iv2b_m1, omit=c("fyear"), omit.labels=c("Year dummies"), label="blackmenIV", out="SI_blackmenIV.tex", title="IV estimates: Jail sentences on voting, Male defendants only", omit.stat=c("rsq", "ser"), 
covariate.labels=c("Courtroom instrument", "Jail"), column.labels = c("All", "Black"))  #Table A16 in SI

###and include some covars.
fs2b <- lm(jail ~ crtjailavg1u + fyear, data=black); summary(fs2b)
iv2b <- ivreg(vote2012 ~ jail +fyear | crtjailavg1u +fyear, data = black); summary(iv2b)

fs2b.cov1 <- lm(jail ~ crtjailavg1u + fyear + male + mostsevcharge, data=black); summary(fs2b.cov1)
iv2b.cov1 <- ivreg(vote2012 ~ jail +fyear + male + mostsevcharge| crtjailavg1u +fyear + male+ mostsevcharge , data = black); summary(iv2b.cov1)

fs2b.cov2 <- lm(jail ~ crtjailavg1u + fyear + male + mostsevcharge +ageatfile, data=black); summary(fs2b.cov2)
iv2b.cov2 <- ivreg(vote2012 ~ jail +fyear + male + mostsevcharge +ageatfile| crtjailavg1u +fyear + male+ mostsevcharge+ageatfile , data = black); summary(iv2b.cov2)

#also want to set up day of the week, maybe month? as covars.
voter1$dayofweek <- weekdays(voter1$casedate) #case filing date (not appearance) 
black$dayofweek <- weekdays(black$casedate) 
voter1$mon <- format(voter1$casedate, "%b")
black$mon <- format(black$casedate, "%b")

fs2b.cov3 <- lm(jail ~ crtjailavg1u + fyear + male + mostsevcharge +ageatfile + dayofweek +mon, data=black); summary(fs2b.cov3)
iv2b.cov3 <- ivreg(vote2012 ~ jail +fyear +male + mostsevcharge +ageatfile + dayofweek +mon| crtjailavg1u +fyear + male+ mostsevcharge+ageatfile +dayofweek +mon, data = black); summary(iv2b.cov3)

iv2.cov3 <- ivreg(vote2012 ~ jail +fyear + male + mostsevcharge +ageatfile + dayofweek +mon| crtjailavg1 +fyear + male + mostsevcharge+ageatfile +dayofweek +mon, data = voter1); summary(iv2.cov3) #everyone

iv2.cov4 <- ivreg(vote2012 ~ jail +fyear + male + mostsevcharge +ageatfile + dayofweek +mon+ as.factor(def_zip)| crtjailavg1 +fyear + male + mostsevcharge+ageatfile +dayofweek +mon + as.factor(def_zip), data = voter1); summary(iv2.cov4) #everyone, including zipcode FEs
iv2b.cov4 <- ivreg(vote2012 ~ jail +fyear +male + mostsevcharge +ageatfile+dayofweek +mon + as.factor(def_zip)| crtjailavg1u +fyear + male+ mostsevcharge+ageatfile +dayofweek +mon+ as.factor(def_zip), data = black); summary(iv2b.cov4) #black

#make table for SI.
stargazer(iv2b.cov4, iv2b.cov3, iv2.cov4, iv2.cov3, omit=c("fyear", "dayofweek", "mon", "def_zip"), omit.labels=c("Year dummies", "Day-of-week dummies", "Month dummies", "Zip code dummies"), label="covarIV", out="SI_covarIV.tex", title="IV estimates: Jail sentences on voting, adding covariates", omit.stat=c("rsq", "ser"), covariate.labels=c("Jail"), column.labels = c("Black", "Black", "All", "All")) #Table A17 in SI 


## also run the (weird, post-treatment, but) obvious robustness check people keep asking about: main analysis on just registered voters (people I found in the 2014 voter file). 
registered  <- voter1[registered==1,]
dim(voter1); dim(registered)
fs2_m1 <- lm(jail ~ crtjailavg1 + fyear, data=registered); summary(fs2_m1)
iv2_m1 <- ivreg(vote2012 ~ jail +fyear | crtjailavg1 +fyear, data = registered); summary(iv2_m1) #bigger but of course noisier.

registered_b <- black[registered==1,]
dim(black); dim(registered_b)
fs2b_m1 <- lm(jail ~ crtjailavg1u + fyear, data=registered_b); summary(fs2b_m1)
iv2b_m1 <- ivreg(vote2012 ~ jail +fyear | crtjailavg1u +fyear, data = registered_b); summary(iv2b_m1) #little larger, little noisier.

registered_w <- white[registered==1,]
dim(white); dim(registered_w)
fs2w_m1 <- lm(jail ~ crtjailavg1u + fyear, data=registered_w); summary(fs2w_m1)
iv2w_m1 <- ivreg(vote2012 ~ jail +fyear | crtjailavg1u +fyear, data = registered_w); summary(iv2w_m1) 

#make table for SI:
stargazer(iv2w_m1, iv2b_m1, omit=c("fyear"), omit.labels=c("Year dummies"), label="blackwhiteregIV", out="SI_blackwhiteregIV.tex", title="IV estimates: Jail sentences on voting, Registered voters only", omit.stat=c("rsq", "ser"), column.labels=c("White", "Black"),
covariate.labels=c("Jail", "Constant")) #Table A18 in SI


## and make the same table for 2008 voters.  Make a section in the SI called "subsetting on post-treatment variables" or something ;-)
voter1[, vote2008 := 0]
voter1[vh08g1>0 & is.na(vh08g1)==F, vote2008 := 1]

black[, vote2008 := 0]
black[vh08g1>0 & is.na(vh08g1)==F, vote2008 := 1]

voters08 <- voter1[vote2008==T,]
voters08b<- black[vote2008==T,]

dim(voter1); dim(voters08); dim(black); dim(voters08b) #getting a little small.
fs2_v1 <- lm(jail ~ crtjailavg1 + fyear, data=voters08); summary(fs2_v1)
iv2_v1 <- ivreg(vote2012 ~ jail +fyear | crtjailavg1 +fyear, data = voters08); summary(iv2_v1) #bigger but of course noisier.

fs2b_v1 <- lm(jail ~ crtjailavg1u + fyear, data=voters08b); summary(fs2b_v1)
iv2b_v1 <- ivreg(vote2012 ~ jail +fyear | crtjailavg1u +fyear, data = voters08b); summary(iv2b_v1) #little larger, little noisier.

#make table for SI.
stargazer(fs2b_v1, iv2b_v1, omit=c("fyear"), omit.labels=c("Year dummies"), label="blackvotersIV", out="SI_blackvotersIV.tex", title="IV estimates: Jail sentences on voting, Black defendants with recorded 2008 turnout", omit.stat=c("rsq", "ser"), 
covariate.labels=c("Courtroom instrument", "Jail")) # Table A20 in SI

stargazer(fs2_v1, iv2_v1, omit=c("fyear"), omit.labels=c("Year dummies"), label="allvotersIV", out="SI_allvotersIV.tex", title="IV estimates: Jail sentences on voting, All defendants with recorded 2008 turnout", omit.stat=c("rsq", "ser"), 
covariate.labels=c("Courtroom instrument", "Jail")) #Table A19 in SI


####
#and run it for continuous sentencing? though do we really expect one more day of jail to do anything? qual. evidence suggest first few days are big deal. 

black[,logdays:= log(sentencedays+.01)] 
hist(black$logdays)

black[,crtjailavglogdays:= mean(logdays, na.rm=T), by=list(crt, fyear)]
fs2b_d <- lm(logdays ~ crtjailavglogdays + fyear, data=black); summary(fs2b_d)
iv2b_d <- ivreg(vote2012 ~ logdays +fyear | crtjailavglogdays +fyear, data = black); summary(iv2b_d) #still there; noisier.

voter1[,crtjailavgdays:= mean(sentencedays, na.rm=T), by=list(crt, fyear)]
fs2_d <- lm(sentencedays ~ crtjailavgdays + fyear, data=voter1); summary(fs2_d)
iv2_d <- ivreg(vote2012 ~ sentencedays +fyear | crtjailavgdays +fyear, data = voter1); summary(iv2_d) #small & noisy

#make table.
stargazer(fs2_d, iv2_d, omit=c("fyear"), omit.labels=c("Year dummies"), label="sentencelengthIV", out="SI_sentencelengthIV.tex", title="IV estimates: Jail sentence length on voting, All defendants", omit.stat=c("rsq", "ser"), 
covariate.labels=c("Courtroom instrument", "Sentence Length (days, logged)"), column.labels = c("First Stage", "2SLS"))
stargazer(fs2b_d, iv2b_d, omit=c("fyear"), omit.labels=c("Year dummies"), label="sentencelengthIV_B", out="SI_sentencelengthIV_B.tex", title="IV estimates: Jail sentence length on voting, Black defendants", omit.stat=c("rsq", "ser"), 
covariate.labels=c("Courtroom instrument", "Sentence Length (days, logged)"), column.labels = c("First Stage", "2SLS"))
#Tables A21 and A22 in SI

######################
#now, dig a little bit into mechanisms.
# do people who go to jail the first time (compliers) become more likely to get rearrested, go back to jail, or to end up with a felony conviction by 2012?
iv.rearrest.all <- ivreg(jailsentences ~ jail +fyear | crtjailavg1 +fyear, data = voter1); summary(iv.rearrest.all) #no?
iv.felony.all <- ivreg(felonyconvictions ~ jail +fyear | crtjailavg1 +fyear, data = voter1); summary(iv.felony.all)

fs.felony.all <- lm(jail ~ crtjailavg1 +fyear, data = voter1); summary(fs.felony.all) 
iv.rearrest.black <- ivreg(jailsentences ~ jail +fyear | crtjailavg1u +fyear, data = black); summary(iv.rearrest.black) 
iv.felony.black <- ivreg(felonyconvictions ~ jail +fyear | crtjailavg1u +fyear, data = black); summary(iv.felony.black)

#Nothing here.  Is the timing just too short?
table(voter1$jailsentences); table(voter1$felonyconvictions)
table(voter1$jailsentences)[1]/nrow(voter1)
table(voter1$felonyconvictions)[1]/nrow(voter1)
#vast majority of people just don't get sentenced to jail again before 2012, or convicted of a felony (or several).

stargazer(iv.rearrest.black,iv.felony.black , omit=c("fyear"), omit.labels=c("Year dummies"), label="rearrestIV", out="SI_rearrestfelony.tex", title="IV estimates: Jail sentence on new jail sentence/felony conviction, Black defendants", omit.stat=c("rsq", "ser"), covariate.labels=c("Jail"), dep.var.labels=c("More Jail", "Felony Conviction")) #Table A5 in SI

#and, per R1's question, what happens to the main results if we just drop people with either of these characteristics (though that's post-treatment)?
iv2b.dropconv <- ivreg(vote2012 ~ jail +fyear | crtjailavg1u +fyear, data = black[!(felonyconvictions>0 | jailsentences>0),]); summary(iv2b.dropconv, diagnostics=T)
iv2.dropconv <- ivreg(vote2012 ~ jail +fyear | crtjailavg1 +fyear, data = voter1[!(felonyconvictions>0 | jailsentences>0),]); summary(iv2.dropconv, diagnostics=T) 
#Table A6 for SI
stargazer(iv2.dropconv, iv2b.dropconv , omit=c("fyear"), omit.labels=c("Year dummies"), label="droprearrestIV", out="SI_droprearrestfelony.tex", title="Main IV estimates, dropping people with new jail sentence/felony conviction ", omit.stat=c("rsq", "ser"), covariate.labels=c("Jail"), dep.var.labels=c("Voted2012"), column.labels=c("All Defendants", "Black Defendants"))

################
## What about all cases BUT pot? (in case of concerns about slight imbalance from the scatterplots below.)
notpot <- voter1[com_off_lit_1 != "POSS MARIJ 0-2 OZ",]; dim(notpot)
fs2d_1 <- lm(jail ~ crtjailavg1 + fyear, data=notpot); summary(fs2d_1) #fstat looks okay still
iv2d_1 <- ivreg(vote2012 ~ jail +fyear | crtjailavg1 +fyear, data = notpot); summary(iv2d_1) #similar to the main analysis: 5 pts and noisy.

#black defendants?
fs2d_b1 <- lm(jail ~ crtjailavg1 + fyear, data=notpot[def_rac=="B"]); summary(fs2d_b1) #weaker
iv2d_b1 <- ivreg(vote2012 ~ jail +fyear | crtjailavg1 +fyear, data = notpot[def_rac=="B"]); summary(iv2d_b1) #noisier, still consistent with main.
notpot[def_rac=="B",crtjailavg1u:= mean(jail, na.rm=T), by=list(crt, fyear)]
iv2d_b1 <- ivreg(vote2012 ~ jail +fyear | crtjailavg1u +fyear, data = notpot[def_rac=="B"]); summary(iv2d_b1) #with race-specific instrument 
#make this table.

#Table A10 in SI
stargazer(iv2d_1, iv2d_b1 , omit=c("fyear"), omit.labels=c("Year dummies"), label="droppotIV", out="SI_droppotcases.tex", title="Main IV estimates, dropping marijuana possession charges", omit.stat=c("rsq", "ser"), covariate.labels=c("Jail"), dep.var.labels=c("Voted2012"), column.labels=c("All Defendants", "Black Defendants"))


###############
#what can I say about compliers?
#mainIV:
iv2 <- ivreg(vote2012 ~ jail +fyear | crtjailavg1 +fyear, data = voter1); summary(iv2)
iv2b <- ivreg(vote2012 ~ jail +fyear | crtjailavg1u +fyear, data = black); summary(iv2b)
iv2w <- ivreg(vote2012 ~ jail +fyear | crtjailavg1u +fyear, data = white); summary(iv2w)
#now run this with binary instrument, like in Angrist & Pischke
allmed <- median(voter1$crtjailavg1)
voter1[, harshjudge:=0]
voter1[crtjailavg1 >=allmed, harshjudge:=1] 
iv2_b <- ivreg(vote2012 ~ jail +fyear | harshjudge +fyear, data = voter1); summary(iv2_b)
iv2_b1 <- ivreg(vote2012 ~ jail +fyear | harshjudge +fyear, data = voter1[def_rac=="B",]); summary(iv2_b1)
fs2_b <- lm(jail ~ harshjudge + fyear, data = voter1); summary(fs2_b) 

black[, harshjudge:=0]
black[crtjailavg1u >= allmed, harshjudge:=1]
iv2b_b <- ivreg(vote2012 ~ jail +fyear | harshjudge +fyear, data = black); summary(iv2b_b)
fs2b_b <- lm(jail ~ harshjudge + fyear, data = black); summary(fs2b_b) 

white[, harshjudge:=0]
white[crtjailavg1u >= allmed, harshjudge:=1]
iv2w_b <- ivreg(vote2012 ~ jail +fyear | harshjudge +fyear, data = white); summary(iv2w_b)
fs2w_b <- lm(jail ~ harshjudge + fyear, data = white); summary(fs2w_b) 

#how many compliers in the sample of black defendants?
fs2b_b <- lm(jail ~ harshjudge + fyear, data = black); summary(fs2b_b) 
Pz1 <- sum(black$harshjudge) #for how many is the inst. switched on? 
Pd1 <- sum(black$jail) #how many treated?
#http://stats.stackexchange.com/questions/135400/estimating-number-of-compliers

(coef(fs2b_b)[2]*Pz1) /Pd1 

#white defendants?
fs2b_w <- lm(jail ~ harshjudge + fyear, data = white); summary(fs2b_w) 
Pz1 <- sum(white$harshjudge) #for how many is the inst. switched on? 
Pd1 <- sum(white$jail) #how many treated?
(coef(fs2b_w)[2]*Pz1)/Pd1 

#characterizing compliers
# http://polmeth.wustl.edu/conferences/methods2009/resources/slides/Angrist-IVbeamerHANDOUT.pdf

#age, are compliers older than the defendant pop overall?
fs2b_old <- lm(jail ~ harshjudge + fyear, data = black[over30 == T,]); summary(fs2b_old) 
coef(fs2b_old)[2] / coef(fs2b_b)[2] #younger
summary(black$over30) 
#sex
summary(black$male)
fs2b_male <- lm(jail ~ harshjudge + fyear, data = black[male == T,]); summary(fs2b_male) 
coef(fs2b_male)[2] / coef(fs2b_b)[2] 
fs2b_female <- lm(jail ~ harshjudge + fyear, data = black[male == F,]); summary(fs2b_female) 
coef(fs2b_female)[2] / coef(fs2b_b)[2] #more female

#class A charge?
fs2b_classA <- lm(jail ~ harshjudge + fyear, data = black[classA == T,]); summary(fs2b_classA) 
coef(fs2b_classA)[2] / coef(fs2b_b)[2] 
#what about 2008 voting?
voter1[, vote2008 := 0]
voter1[vh08g1>0 & is.na(vh08g1)==F, vote2008 := 1]

black[, vote2008 := 0]
black[vh08g1>0 & is.na(vh08g1)==F, vote2008 := 1]

fs2b_vote08A <- lm(jail ~ harshjudge + fyear, data = black[vote2008 == T,]); summary(fs2b_vote08A) 
coef(fs2b_vote08A)[2] / coef(fs2b_b)[2] 

#is this true for white defs too?
white[, vote2008 := 0]
white[vh08g1>0 & is.na(vh08g1)==F, vote2008 := 1]
fs2w_vote08A <- lm(jail ~ harshjudge + fyear, data = white[vote2008 == T,]); summary(fs2w_vote08A) 
coef(fs2w_vote08A)[2] / coef(fs2b_w)[2] 
#relative likelihood, so we're looking at whether it's above or below 1

#now trying out Aronow/Carnegie's ICSW package (assuming ignorability etc.)
library(icsw)
library(foreign)
library(rgenoud)
library(gtools)
library(sem)
set.seed(02138) 
numiter <- 500
alpha <- 0.275

blackframe <- as.data.frame(black)[is.na(black$male)==F & is.na(black$yearsatfile)==F,]
covmatH <- blackframe[,c("male", "yearsatfile", "classA")]
head(covmatH)
covmatH$male <- as.numeric(covmatH$male)
N <- nrow(blackframe)

test1 <- icsw.tsls(D=blackframe$jail, X = cbind(1, blackframe$male, blackframe$yearsatfile, blackframe$classA), Y=blackframe$vote2012, Z=blackframe$harshjudge, W=covmatH,R = numiter, estimand = c("ATE", "ATT"),
min.prob.quantile = 1 / (N^alpha))

round(test1$coefficients["D"], 2)
test1$coefficients
# Display vector of (bootstrapped) SEs
test1$coefs.se.boot

#same deal for white defs. 
whiteframe <- as.data.frame(white)[is.na(white$male)==F & is.na(white$yearsatfile)==F,]
covmatH <- whiteframe[,c("male", "yearsatfile", "classA")]
head(covmatH)
covmatH$male <- as.numeric(covmatH$male)

test1w <- icsw.tsls(D=whiteframe$jail, X = cbind(1, whiteframe$male, whiteframe$yearsatfile, whiteframe$classA), Y=whiteframe$vote2012, Z=whiteframe$harshjudge, W=covmatH,R = 5, estimand = c("ATE", "ATT"),
min.prob.quantile = 1 / (N^alpha))

round(test1w$coefficients["D"], 2)
test1w$coefficients
# Display vector of (bootstrapped) SEs
test1w$coefs.se.boot

#make table.  https://stackoverflow.com/questions/39041675/stargazer-user-supplied-coefficients-and-se
dep.var <- "Vote2012"
regressors <- c("Constant", "Male", "Age", "ClassA", "Jail")

d <- as.data.frame(matrix(rnorm(10 * 6), nc = 6))
names(d) <- c(dep.var, regressors)
f <- as.formula(paste(dep.var, "~ 0 +", paste(regressors, collapse = "+")))
p <- lm(f, d) #fake lm object just as placeholder, will now replace all coefs & SEs
p1 <- lm(f, d)

names(test1$coefficients) <- c("Constant", "Male", "Age", "ClassA", "Jail") #have to rename coefs so they line up and will squeeze into the table.
names(test1$coefs.se.boot)<- c("Constant", "Male", "Age", "ClassA", "Jail") 
names(test1w$coefficients) <- c("Constant", "Male", "Age", "ClassA", "Jail") 
names(test1w$coefs.se.boot)<- c("Constant", "Male", "Age", "ClassA", "Jail") 

stargazer(p, p1, label="ICSWboth", out="ICSWIV_SI.tex", title="Reweighted IV estimates (ICSW): Effect of jail on voting",
  coef = list(test1$coefficients, test1w$coefficients) ,
  se = list(test1$coefs.se.boot, test1w$coefs.se.boot),
  #t = list(vec.coeffs / vec.se),
  omit.stat = "all")
#Table A34 in SI

##########################################################
## try crt x year x race (or x charge) insts
table(voter1$def_rac)
voter1[,crtjailavg2:= mean(jail, na.rm=T), by=list(crt, fyear, def_rac)]
fs2.2 <- lm(jail ~ crtjailavg2 + fyear +def_rac, data = voter1); summary(fs2.2) 
iv2.2<- ivreg(vote2012 ~ jail +fyear +def_rac| crtjailavg2 +fyear+def_rac, data = voter1); summary(iv2.2) 
# this gives a bigger/more precise overall est-- presumably am predicting better within-race this way.
# but also worry about individuals starting to drive their own ests in the smaller cells.
# remove tiny cells for other racial groups:
voter2 <- voter1[def_rac=="W"|def_rac=="B",]
voter2[,crtjailavg2:= mean(jail, na.rm=T), by=list(crt, fyear, def_rac)]
fs2.2 <- lm(jail ~ crtjailavg2 + fyear +def_rac, data = voter2); summary(fs2.2) 
iv2.2<- ivreg(vote2012 ~ jail +fyear +def_rac| crtjailavg2 +fyear+def_rac, data = voter2); summary(iv2.2)

#cluster the SE's.
library(multiwayvcov)
courtroomyears <- c(1:75) #15*5
crtyears <- as.data.frame(cbind((rep(c(1:15), 5)), as.character(c(rep(2008,15), rep(2009,15), rep(2010,15), rep(2011,15), rep(2012,15))), courtroomyears))
colnames(crtyears) <- c("crt","fyear","crtyrnum")
crtyears$crt <- as.numeric(crtyears$crt)
allclust <- merge(as.data.frame(voter2), crtyears, by=c("crt", "fyear"), all.x=T)
dim(allclust); dim(voter2)
iv2.2<- ivreg(vote2012 ~ jail +fyear +def_rac| crtjailavg2 +fyear+def_rac, data = allclust); summary(iv2.2)
vcov_cyear <- cluster.vcov(iv2.2, allclust$crtyrnum)
coeftest(iv2.2, vcov_cyear)
vcov_court <- cluster.vcov(iv2.2, allclust$crt)
iv2b.c <- coeftest(iv2.2, vcov_court)
fsvcov_race <- cluster.vcov(fs2.2, voter2$crt)
fs_race <- coeftest(fs2.2, fsvcov_race)

stargazer(fs2.2, iv2.2, omit=c("fyear"), omit.labels=c("Year dummies"), label="raceinstIV", out="SI_raceinstIV.tex", title="IV estimates: Jail on voting, Courtroom x Year x Race Instrument", omit.stat=c("rsq", "ser"), 
covariate.labels=c("Courtroom instrument", "Jail", "White"),
se = list(fs_race[,"Std. Error"],iv2b.c[,"Std. Error"])) #Table A26 in SI

###charge-specific stuff, dropping any charges with <1000 cases
chargetab <- sort(table(voter1$com_off_lit_1)); keep <- chargetab[chargetab>100]; dim(keep)
voter3 <- voter1[com_off_lit_1 %in% rownames(keep),]; dim(voter3)
voter3[,crtjailavg3:= mean(jail, na.rm=T), by=list(crt, fyear, com_off_lit_1)]
fs2.3 <- lm(jail ~ crtjailavg3 + fyear  + com_off_lit_1 , data = voter3); summary(fs2.3)  
iv2.3<- ivreg(vote2012 ~ jail +fyear + com_off_lit_1| crtjailavg3 +fyear+ com_off_lit_1, data = voter3); summary(iv2.3) 

fs2.3 <- lm(jail ~ crtjailavg3 + fyear  + com_off_lit_1 , data = voter3[def_rac=="W",]); summary(fs2.3)  
iv2.3<- ivreg(vote2012 ~ jail +fyear + com_off_lit_1| crtjailavg3 +fyear+ com_off_lit_1, data = voter3[def_rac=="W",]); summary(iv2.3) 

fs2.3 <- lm(jail ~ crtjailavg3 + fyear  + com_off_lit_1 , data = voter3[def_rac=="B",]); summary(fs2.3)  
iv2.3<- ivreg(vote2012 ~ jail +fyear + com_off_lit_1| crtjailavg3 +fyear+ com_off_lit_1, data = voter3[def_rac=="B",]); summary(iv2.3) 

#leave-one-out means; this takes a while to run
voter1$charge_all_pred <- rep(NA, nrow(voter1)) 
voter1$charge_judge_pred <- rep(NA, nrow(voter1))
for (i in 1:nrow(voter1)){
	crtval <- voter1$crt[i]
	varval <- voter1$com_off_lit_1[i] 
	subdef <- voter1[-i,] #drop this individual
	voter1$charge_all_pred[i] <- mean(subdef[subdef$com_off_lit_1==varval]$jail) #take mean for all others in cell
	voter1$charge_judge_pred[i] <- mean(subdef[subdef$crt==crtval & subdef$com_off_lit_1==varval]$jail) #by judge
	if(i %% 1000==0) {cat(paste0("iteration: ", i, "\n")) }
}

voter1a <- voter1[!is.na(charge_all_pred),]; dim(voter1a)

fs2_loo <- lm(jail ~ charge_judge_pred + com_off_lit_1 , data=voter1a); summary(fs2_loo)
iv2_loo <- ivreg(vote2012 ~ jail| charge_judge_pred, data = voter1a); summary(iv2_loo, diagnostics=T)

#make table here, cluster SE's 
library(multiwayvcov)
vcov_crt <- cluster.vcov(iv2_loo, voter1a$crt)
iv_crt <- coeftest(iv2_loo, vcov_crt) 

iv2b_loo <- ivreg(vote2012 ~ jail | charge_judge_pred, data = voter1a[def_rac=="B", ]); summary(iv2b_loo)
vcov_crtb <- cluster.vcov(iv2b_loo, voter1a[def_rac=="B", ]$crt)
ivb_crt <- coeftest(iv2b_loo, vcov_crtb)

#make table here (drop the individual charge-type coeffs and note that in a row at the bottom.)
stargazer(iv2_loo, iv2b_loo, omit=c("fyear", "com_off_lit_1"), label="chargeinstIV",  title="IV estimates: Jail on voting, Courtroom x Charge Instrument (Leave-one-out means)", omit.stat=c("rsq", "ser") , covariate.labels=c("Jail" ), #omit.labels=c("Year dummies", "Charge dummies"),
add.lines = list(c("Year dummies", "Yes", "Yes"), c("Charge Dummies", "Yes", "Yes")), #adding in manually since omit.labels not working
out="SI_chargeinstIV.tex",  star.cutoffs=c(0.05), notes="$^{*}$p$<$0.05", notes.append=FALSE, se = list(iv_crt[,"Std. Error"],ivb_crt[,"Std. Error"]),  column.labels   = c("All", "Black")) 
#Table A27 in SI. 

##########################
## make main paper figure

iv2 <- ivreg(vote2012 ~ jail +fyear | crtjailavg1 +fyear, data = voter1); summary(iv2)
summary(iv2) 
allclust <- merge(as.data.frame(voter1), crtyears, by=c("crt", "fyear"), all.x=T)
dim(allclust); dim(voter1)
iv2 <- ivreg(vote2012 ~ jail +fyear | crtjailavg1 +fyear, data = allclust); summary(iv2)
vcov_court.all <- cluster.vcov(iv2, allclust$crt)
iv2.c <- coeftest(iv2, vcov_court.all)

library(stats)
yearpoint <- coef(iv2)[2]
yearCI <- confint(iv2, "jail", level=.95)

ests_main <- rbind(cbind(yearpoint, yearCI))
ests_main <- ests_main*100 #rescale to be percentage pts.

#black/white defendant ests.
bpoint <- coef(iv2b)[2]
bCI <- confint(iv2b, "jail", level=.95)
wpoint <- coef(iv2w)[2]
wCI <- confint(iv2w, "jail", level=.95)
ests_race <- rbind(cbind(bpoint, bCI), cbind(wpoint, wCI))
ests_race2 <- rbind(cbind(yearpoint, yearCI),cbind(bpoint, bCI), cbind(wpoint, wCI))
ests_race2 <- ests_race2*100
ests_race <- ests_race*100
pdf(file="raceIV1.pdf")
plot(ests_race[,1],c(1,2),ylim=c(0.5,2.5), xlim=c(-30,15),yaxt='n',pch=19,  main="IV estimates by Race: Jail on 2012 Voting", xlab="",ylab=NA)
abline(v=0, col="lightgray", lty="dotted", lwd=3)
segments(ests_race[1,2], 1, ests_race[1,3],1,lwd=3, col="firebrick")
points(ests_race[1,1], 1,pch=19, col="firebrick")
segments(ests_race[2,2], 2, ests_race[2,3], 2,lwd=3)
axis(2, at=c(1, 2),labels=c("Black","White"), cex.axis=.95, las=1)
dev.off() #Figure 2 in paper 


######################################################################
### VOTE HISTORY

#make sure it's not that voters are getting assigned to certain courtrooms (with, again, concerns about post-treatment) 
vote08 <- table(voter1$vote2008, voter1$crt)
chisq.test(vote08)

#look at voting rates by group 
mean(voter1$vote2012); mean(voter1$vote2008)
mean(voter1[black==T]$vote2012); mean(voter1[black==T]$vote2008)
mean(voter1[def_rac=="W"]$vote2012); mean(voter1[def_rac=="W"]$vote2008)
t.test(voter1[def_rac=="W"]$vote2008, voter1[def_rac=="B"]$vote2008)

vote2008 <- lm(vote2008 ~ black, data=voter1); summary(vote2008)
vote2008plus <- lm(vote2008 ~ black + male + over30 + mostsevcharge, data=voter1); summary(vote2008plus)

stargazer(vote2008, vote2008plus, title = "Differences in pre-arrest voter turnout by race", covariate.labels=c("Black", "Male", "Over 30", "Charge severity"), label="votediffs", out="prearrestvotebyrace.tex", align=T, omit.stat=c("LL","ser","f"), column.labels=c("Turnout 2008", "Turnout 2008"), model.numbers = FALSE, dep.var.labels.include = FALSE, star.cutoffs=c(0.05), notes="$^{*}$p$<$0.05", notes.append=FALSE)
#Table 4 in paper


#note, could also run the placebo test of 2008 vote on jail (should be null)
#but as discussed in paper-- I worry this isn't really a pre-treatment measure. (esp. for black voters, given litigation.)
#I know there have been significant purges-- I'm missing something like 10% of 2008 voters in the 2014 file.

black <- voter1[def_rac=="B",]
white <- voter1[def_rac=="W",]

black[,crtjailavg1u:= mean(jail, na.rm=T), by=list(crt, fyear)]
white[,crtjailavg1u:= mean(jail, na.rm=T), by=list(crt, fyear)]
fviv1.1b.p <- ivreg(vote2008 ~ jail +fyear| crtjailavg1u +fyear, data=black); summary(fviv1.1b.p) 
fviv1.1w.p <- ivreg(vote2008 ~ jail +fyear| crtjailavg1u +fyear, data=white); summary(fviv1.1w.p) 

#show this test to R5. cluster SE's and make table  
courtroomyears <- c(1:75) #15*5
crtyears <- as.data.frame(cbind((rep(c(1:15), 5)), as.character(c(rep(2008,15), rep(2009,15), rep(2010,15), rep(2011,15), rep(2012,15))), courtroomyears))
colnames(crtyears) <- c("crt","fyear","crtyrnum")
crtyears$crt <- as.numeric(crtyears$crt)
blackclust <- merge(as.data.frame(black), crtyears, by=c("crt", "fyear"), all.x=T)
iv2b <- ivreg(vote2008 ~ jail +fyear | crtjailavg1u +fyear, data = blackclust); summary(iv2b, diagnostics=T)
vcov_court <- cluster.vcov(iv2b, blackclust$crt)
iv2b.c <- coeftest(iv2b, vcov_court)

whiteclust <- merge(as.data.frame(white), crtyears, by=c("crt", "fyear"), all.x=T)
dim(whiteclust); dim(white)
iv2w <- ivreg(vote2008 ~ jail +fyear | crtjailavg1u +fyear, data = whiteclust); summary(iv2w)
vcov_court <- cluster.vcov(iv2w, whiteclust$crt)
iv2w.c <- coeftest(iv2w, vcov_court)

stargazer(iv2b, iv2w, omit=c("fyear"), se = list(iv2b.c[,"Std. Error"],iv2w.c[,"Std. Error"]),
omit.labels=c("Year dummies"), add.lines=list(c("Clustered SE's", "Courtroom", "Courtroom"),c("First Stage F-Statistic", round(summary(fs2b)$fstatistic[1], digits=2), round(summary(fs2w)$fstatistic[1], digits=2))), label="simplestraceIV_08placebo", out="simplestraceIV_cluster_08placebo.tex", title="Placebo IV estimates: Jail sentences on voting, by race", omit.stat=c("rsq", "ser"), dep.var.labels   = "Voted 2008", column.labels = c("Black Defendants", "White Defendants"), 
 star.cutoffs=c(0.05), notes="$^{*}$p$<$0.05", notes.append=FALSE)
#Table A35 in SI


#################################################################
### also: look at random assignment in this sample.
#do defendant characteristics look the same across courtrooms?

voter1[, cases:=1]
courts <- voter1[, list(totalcases = sum(cases), pctmale = sum(male, na.rm=T)/sum(cases), pctblack = sum(black)/sum(cases), pctover30 = sum(over30, na.rm=T)/sum(cases)), by=crt]
setkey(courts, crt)

courts <- voter1[, list(totalcases = sum(cases), pctmale = sum(male, na.rm=T)/sum(cases), pctblack = sum(black)/sum(cases), pctover30 = sum(over30, na.rm=T)/sum(cases), pctjail = sum(jail, na.rm=T)/sum(cases), pctvoted12 = sum(vote2012,na.rm=T)/sum(cases)), by=crt]
setkey(courts, crt)

stargazer(courts, summary=F, label="courtvals", title = "Defendant Characteristics by Courtroom, 2008-2012", out="courtvals2.tex", rownames=F, covariate.labels= c("Court", "Total", "Percent Male", "Percent Black", "Percent \\textgreater 30", "Percent Jailed", "Percent Voted 2012" ))
#Table A11 in SI

#plot pre-treatment chars
voter1[, classA:= 0]
voter1[mostsevcharge==5, classA:= 1]

courts <- voter1[, list(totalcases = sum(cases), pctmale = sum(male, na.rm=T)/sum(cases), pctblack = sum(black)/sum(cases), pctover30 = sum(over30, na.rm=T)/sum(cases), pctjail = sum(jail)/sum(cases), pctclassA= sum(classA)/sum(cases)), by=list(crt, fyear)]
setkey(courts, crt, fyear)

courts2 <- voter1[, list(totalcases = sum(cases), pctmale = sum(male, na.rm=T)/sum(cases), pctblack = sum(black)/sum(cases), pctover30 = sum(over30, na.rm=T)/sum(cases), pctjail = sum(jail)/sum(cases), pctclassA= sum(classA)/sum(cases)), by=list(crt, fyear)]
setkey(courts2, fyear, crt)

#also, look quickly at case volume. 
courts3 <- voter1[, list(totalcases = sum(cases), pctmale = sum(male, na.rm=T)/sum(cases), pctblack = sum(black)/sum(cases), pctover30 = sum(over30, na.rm=T)/sum(cases), pctjail = sum(jail)/sum(cases), pctclassA= sum(classA)/sum(cases)), by=list(crt)]
setkey(courts3, crt)

pdf("courtharshness_casevolume_byyear.pdf")
par(mfrow=c(2,3))
yearslist <- c("2008", "2009", "2010", "2011", "2012")
for (i in 1:length(yearslist)){
	courtsi <- courts2[fyear==yearslist[i],]
	plot(courtsi$pctjail, courtsi$totalcases,ylab="Total Cases", xlab="Courtroom Jail Rate", main=yearslist[i], ylim=c(0,2500))
abline(lm( courtsi$totalcases ~ courtsi$pctjail))
}
dev.off()
#Figure A12 in SI



courts09 <- voter1[fyear=="2009", list(totalcases = sum(cases), pctmale = sum(male, na.rm=T)/sum(cases), pctblack = sum(black)/sum(cases), pctover30 = sum(over30, na.rm=T)/sum(cases), pctjail = sum(jail)/sum(cases), pctclassA= sum(classA)/sum(cases)), by=list(crt, fyear)]
setkey(courts09, crt, fyear)
pdf("prepostboxplot2009.pdf")
boxplot(list(courts09$pctclassA,courts09$pctmale,courts09$pctblack, courts09$pctover30, courts09$pctjail), main="Boxplot of Pre- and Post-Assignment Variables by Court:2009", xaxis='n', range=0, names=c("Class A", "Male", "Black", "Over 30", "Jail"))
dev.off()

courts10 <- voter1[fyear=="2010", list(totalcases = sum(cases), pctmale = sum(male, na.rm=T)/sum(cases), pctblack = sum(black)/sum(cases), pctover30 = sum(over30, na.rm=T)/sum(cases), pctjail = sum(jail)/sum(cases), pctclassA= sum(classA)/sum(cases)), by=list(crt, fyear)]
setkey(courts10, crt, fyear)

courts11 <- voter1[fyear=="2011", list(totalcases = sum(cases), pctmale = sum(male, na.rm=T)/sum(cases), pctblack = sum(black)/sum(cases), pctover30 = sum(over30, na.rm=T)/sum(cases), pctjail = sum(jail)/sum(cases), pctclassA= sum(classA)/sum(cases)), by=list(crt, fyear)]
setkey(courts11, crt, fyear)


courts12 <- voter1[fyear=="2012", list(totalcases = sum(cases), pctmale = sum(male, na.rm=T)/sum(cases), pctblack = sum(black)/sum(cases), pctover30 = sum(over30, na.rm=T)/sum(cases), pctjail = sum(jail)/sum(cases), pctclassA= sum(classA)/sum(cases)), by=list(crt, fyear)]
setkey(courts12, crt, fyear)

##Figure A7 in SI
pdf("prepostboxplot_4year.pdf", height=10, width= 8)
par(oma = c(0, 0, 4, 0))
par(mfrow=c(2,2))
boxplot(list(courts09$pctclassA,courts09$pctmale,courts09$pctblack, courts09$pctover30, courts09$pctjail), main="2009", xaxis='n', range=0, names=c("Class A", "Male", "Black", "Over 30", "Jail"))
boxplot(list(courts10$pctclassA,courts10$pctmale,courts10$pctblack, courts10$pctover30, courts10$pctjail), main="2010", xaxis='n', range=0, names=c("Class A", "Male", "Black", "Over 30", "Jail"))
boxplot(list(courts11$pctclassA,courts11$pctmale,courts11$pctblack, courts11$pctover30, courts11$pctjail), main="2011", xaxis='n', range=0, names=c("Class A", "Male", "Black", "Over 30", "Jail"))
boxplot(list(courts12$pctclassA,courts12$pctmale,courts12$pctblack, courts12$pctover30, courts12$pctjail), main="2012", xaxis='n', range=0, names=c("Class A", "Male", "Black", "Over 30", "Jail"))
mtext("Pre-Assignment Characteristics And Sentencing By Courtroom, \n Suggesting Random Assignment", outer = TRUE, cex = 1.5)
dev.off()

# and do age permutation for SI
# for each shuffle, plot courtroom 1's distribution.  then, plot the real courtroom 1.
set.seed(14609)

pdf("courtroompermutations_age.pdf", height=11, width=8)
par(mfrow=c(4,4))
for (c in 1:15){
	title <- paste("Defendant Age: Courtroom ", as.character(c), sep="")
	plot(density(na.omit(as.numeric(voter1[crt==1,]$ageatfile)/365)),xlim=c(0,90), ylim=c(0,.07), main=title, col="white", cex.main=.8)
	for (i in 1:100){
		voter2 <- copy(voter1)
		voter2$newcourt <- sample(voter2$crt, length(voter2$crt)) #shuffle assignments
		points(density(na.omit(as.numeric(voter2[newcourt==c,]$ageatfile)/365)), type="l", col="lightgray")
	}
	points(density(na.omit(as.numeric(voter1[crt==c,]$ageatfile)/365)), type="l", cex=.7, lwd=.6)
}
dev.off() #Figure A4 in SI

###############
# also, look at court caseloads across case types

#set up dummies
voter1[, marijuana02:= 0]
voter1[com_off_lit_1 == "POSS MARIJ 0-2 OZ", marijuana02:= 1, ]
voter1[, DWI:= 0]
voter1[com_off_lit_1 == "DWI 1ST OFFENDER BAC .08", DWI:= 1, ]
voter1[, familyassault:= 0]
voter1[com_off_lit_1 == "ASSAULT-FAMILY MEMBER", familyassault:= 1, ]
voter1[, cases:=1]
# set up a summary stats table across courts for a single year
courtcaseloads2011 <- voter1[fyear==2011, list(totalcases = sum(cases), pctmarijuana = sum(marijuana02, na.rm=T)/sum(cases), pctdwi = sum(DWI)/sum(cases), pctfamilyassault = sum(familyassault, na.rm=T)/sum(cases)), by=crt]
setkey(courtcaseloads2011, crt)
stargazer(courtcaseloads2011, summary=F, label="courtvals", title = "Common Charge Types Across Courtrooms, 2011", rownames=F, covariate.labels= c("Court", "Total", "Marijuana Possession", "DWI", "Assault on a Family Member" ), out="courtcasetypes2011.tex") #Table A12 in SI

#plot proportions (of a given case type) for each courtroom over time
courtcaseloads <- voter1[, list(totalcases = sum(cases), pctmarijuana = sum(marijuana02, na.rm=T)/sum(cases), pctdwi = sum(DWI)/sum(cases), pctfamilyassault = sum(familyassault, na.rm=T)/sum(cases)), by=list(crt, fyear)]
setkey(courtcaseloads, crt, fyear)
#start with pot.
court <- courtcaseloads[crt==1,]
pdf("courtcaseloads_marijuana.pdf")
plot(court$fyear, court$pctmarijuana, ylim=c(0, .25), type="b", main="Court Caseloads: Proportion Marijuana Possession, 2008-2012")
for (i in 2:15){ #add in the other courts)
court <- courtcaseloads[crt==i,]
points(court$fyear, court$pctmarijuana, type="b", add=T)
}
#same deal for DWI and then assault.
court <- courtcaseloads[crt==1,]
pdf("courtcaseloads_dwi.pdf")
plot(court$fyear, court$pctdwi, ylim=c(0, .25), type="b", main="Court Caseloads: Proportion DWI, 2008-2012")
for (i in 2:15){ #add in the other courts)
court <- courtcaseloads[crt==i,]
points(court$fyear, court$pctdwi, type="b", add=T)
}
dev.off()

court <- courtcaseloads[crt==1,]
pdf("courtcaseloads_familyassault.pdf")
plot(court$fyear, court$pctfamilyassault, ylim=c(0, .15), type="b", main="Court Caseloads: Proportion Assault on Family Member, 2008-2012")
for (i in 2:15){ #add in the other courts)
court <- courtcaseloads[crt==i,]
points(court$fyear, court$pctfamilyassault, type="b", add=T)
}
dev.off()
#Figure A8 in SI


### present case assignment differently, per R3 suggestions.
voter1[, multcases := 0]; voter1[numcases>1, multcases := 1] 
#set up dummies for common charge types
voter1[, marijuana02:= 0]; voter1[com_off_lit_1 == "POSS MARIJ 0-2 OZ", marijuana02:= 1, ]
voter1[, DWI:= 0]; voter1[com_off_lit_1 == "DWI 1ST OFFENDER BAC .08", DWI:= 1, ]
voter1[, familyassault:= 0]; voter1[com_off_lit_1 == "ASSAULT-FAMILY MEMBER", familyassault:= 1, ]

courts <- voter1[, list(totalcases = sum(cases), pctmale = sum(male, na.rm=T)/sum(cases), pctblack = sum(black)/sum(cases), pctover30 = sum(over30, na.rm=T)/sum(cases), pctjail = sum(jail)/sum(cases), pctclassA= sum(classA)/sum(cases), pctvoted08 = sum(vote2008)/sum(cases), pctmultcases = sum(multcases)/sum(cases), pctmarijuana = sum(marijuana02, na.rm=T)/sum(cases), pctdwi = sum(DWI)/sum(cases), pctfamilyassault = sum(familyassault, na.rm=T)/sum(cases)), by=list(crt)]
setkey(courts, crt) 

allcourts <- courts 
courts1 <- as.data.frame(allcourts)

pdf("balancescatters_courts_alldef.pdf", height=8, width=5.5)
par(mfrow=c(4,2), mai = c(.5, 0.4, 0.3, 0.1), mgp=c(2, 1, 0))
chars <- c("pctmale", "pctover30", "pctblack", "pctclassA", "pctmultcases", "pctmarijuana", "pctdwi", "pctfamilyassault")
names <- c("Male", "Over 30", "Percent Black", "Class A", "Multiple Cases", "Marijuana", "DWI", "Assault on a Family Member")
for(i in 1:length(chars)){
vec <- courts1[, chars[i]] 
label <- paste("Percent", names[i])
scatter.smooth(courts1$pctjail ~ vec, main=names[i], xlab=label, ylab="Courtroom Jail Rate", span=1.5, ylim=c(.45, .65))
}
dev.off() #Figure 1 in main paper 


#now by race for SI
courts <- voter1[, list(totalcases = sum(cases), pctmale = sum(male, na.rm=T)/sum(cases), pctblack = sum(black)/sum(cases), pctover30 = sum(over30, na.rm=T)/sum(cases), pctjail = sum(jail)/sum(cases), pctclassA= sum(classA)/sum(cases), pctvoted08 = sum(vote2008)/sum(cases), pctmultcases = sum(multcases)/sum(cases), pctmarijuana = sum(marijuana02, na.rm=T)/sum(cases), pctdwi = sum(DWI)/sum(cases), pctfamilyassault = sum(familyassault, na.rm=T)/sum(cases)), by=list(crt,fyear,def_rac)]

blackcourts <- courts[!(fyear==2008) & def_rac=="B", ] #start with black defendants, and drop 2008 since it's so little data
courts1 <- as.data.frame(blackcourts)

pdf("balancescatters_courtyrs_blackdef.pdf", height=7.5, width=5.5)
par(mfrow=c(4,2), mai = c(.5, 0.4, 0.3, 0.1), mgp=c(2, 1, 0))
par(mfrow=c(4,2))
chars <- c("pctmale", "pctover30", "pctclassA", "pctmultcases", "pctmarijuana", "pctdwi", "pctfamilyassault")
names <- c("Male", "Over 30", "Class A", "Multiple Cases", "Marijuana", "DWI", "Assault on a Family Member")
for(i in 1:length(chars)){
vec <- courts1[, chars[i]] 
label <- paste("Percent", names[i])
scatter.smooth(courts1$pctjail ~ vec, main=names[i], xlab=label, span=1.75)
}
dev.off() #Figure A5 in SI

whitecourts <- courts[!(fyear==2008) & def_rac=="W", ] #now white defendants
courts1 <- as.data.frame(whitecourts)

pdf("balancescatters_courtyrs_whitedef.pdf", height=7.5, width=5.5)
par(mfrow=c(4,2), mai = c(.5, 0.4, 0.3, 0.1), mgp=c(2, 1, 0))
par(mfrow=c(4,2))
chars <- c("pctmale", "pctover30", "pctclassA", "pctmultcases", "pctmarijuana", "pctdwi", "pctfamilyassault")
names <- c("Male", "Over 30", "Class A", "Multiple Cases", "Marijuana", "DWI", "Assault on a Family Member")
for(i in 1:length(chars)){
vec <- courts1[, chars[i]] 
label <- paste("Percent", names[i])
scatter.smooth(courts1$pctjail ~ vec, main=names[i], xlab=label, span=1.75)
}
dev.off() #Figure A6 in SI



#################################
#do f-tests as in some other RJA papers
#start with pre-treatment stuff
male <- lm(male ~ (crt_2 + crt_3 +crt_4 + crt_5+ crt_6+ crt_7+ crt_8+ crt_9+ crt_10+ crt_11+ crt_12+ crt_13+ crt_14 + crt_15)*fyear , data=voter1); summary(male)
black <- lm(black ~ (crt_2 + crt_3 +crt_4 + crt_5+ crt_6+ crt_7+ crt_8+ crt_9+ crt_10+ crt_11+ crt_12+ crt_13+ crt_14 + crt_15)*fyear , data=voter1)
summary(black)
age <- lm(yearsatfile ~ (crt_2 + crt_3 +crt_4 + crt_5+ crt_6+ crt_7+ crt_8+ crt_9+ crt_10+ crt_11+ crt_12+ crt_13+ crt_14 + crt_15)*fyear , data=voter1)
summary(age)

#and now court-driven stuff:
conv <- lm(anyconv ~ (crt_2 + crt_3 +crt_4 + crt_5+ crt_6+ crt_7+ crt_8+ crt_9+ crt_10+ crt_11+ crt_12+ crt_13+ crt_14 + crt_15)*fyear , data=voter1); summary(conv)
fine <- lm(fine ~ (crt_2 + crt_3 +crt_4 + crt_5+ crt_6+ crt_7+ crt_8+ crt_9+ crt_10+ crt_11+ crt_12+ crt_13+ crt_14 + crt_15)*fyear , data=voter1); summary(fine)
probation <- lm(probation ~ (crt_2 + crt_3 +crt_4 + crt_5+ crt_6+ crt_7+ crt_8+ crt_9+ crt_10+ crt_11+ crt_12+ crt_13+ crt_14 + crt_15)*fyear , data=voter1); summary(probation)
jail <- lm(jail ~ (crt_2 + crt_3 +crt_4 + crt_5+ crt_6+ crt_7+ crt_8+ crt_9+ crt_10+ crt_11+ crt_12+ crt_13+ crt_14 + crt_15)*fyear , data=voter1); summary(jail)
jailtime <- lm(sentencedays ~ (crt_2 + crt_3 +crt_4 + crt_5+ crt_6+ crt_7+ crt_8+ crt_9+ crt_10+ crt_11+ crt_12+ crt_13+ crt_14 + crt_15)*fyear , data=voter1); summary(jailtime)

modnames <- c("Male", "Black", "Age", "Conviction", "Fine", "Probation", "Jail", "Jail Time")
modftests <- c(summary(male)$fstatistic[1], summary(black)$fstatistic[1], summary(age)$fstatistic[1], summary(conv)$fstatistic[1], summary(fine)$fstatistic[1], summary(probation)$fstatistic[1], summary(jail)$fstatistic[1], summary(jailtime)$fstatistic[1])

ftab <- cbind(modnames, round(modftests, digits=2))
stargazer(ftab, rownames = FALSE, out="courtftests.tex", label="courtftests", title="Testing Court Caseload Differences")
#Table A9 in SI


#################################
## run analysis without each courtroom-yr sequentially.
black <- voter1[def_rac=="B",]
white <- voter1[def_rac=="W",]

black[,crtjailavg1u:= mean(jail, na.rm=T), by=list(crt, fyear)]
white[,crtjailavg1u:= mean(jail, na.rm=T), by=list(crt, fyear)]

courtroomyears <- c(1:75) #15*5
crtyears <- as.data.frame(cbind((rep(c(1:15), 5)), as.character(c(rep(2008,15), rep(2009,15), rep(2010,15), rep(2011,15), rep(2012,15))), courtroomyears))
colnames(crtyears) <- c("crt","fyear","crtyrnum")
crtyears$est <- NA; crtyears$p <- NA; crtyears$CIlow <- NA; crtyears$CIhigh <- NA; 

for (i in 1:nrow(crtyears)){
	drop <- crtyears[i,]
	newdata <- black[!(crt==drop$crt & fyear==drop$fyear),] 
	iv2b <- ivreg(vote2012 ~ jail +fyear | crtjailavg1u +fyear, data = newdata); summary(iv2b)
	crtyears[i, "est"] <-  coef(iv2b)[2]
	crtyears[i, "p"] <- summary(iv2b)$coef[20]
	crtyears[i, "CIlow"] <- confint(iv2b)[2]
	crtyears[i, "CIhigh"] <- confint(iv2b)[8]
}
summary(crtyears$est) 
summary(crtyears$p) 

#and do the same thing with whole courtrooms, though things will get noisier.
courtrooms <- c(1:15) 
crts <- as.data.frame(cbind((rep(c(1:15), 1)),  courtrooms))
colnames(crts) <- c("crt","crtyrnum")
crts$est <- NA; crts$p <- NA; crts$CIlow <- NA; crts$CIhigh <- NA

for (i in 1:nrow(crts)){
	drop <- crts[i,]
	newdata <- black[!(crt==drop$crt),] 
	iv2b <- ivreg(vote2012 ~ jail +fyear | crtjailavg1u +fyear, data = newdata); summary(iv2b)
	crts[i, "est"] <-  coef(iv2b)[2]
	crts[i, "p"] <- summary(iv2b)$coef[20]
	crts[i, "CIlow"] <- confint(iv2b)[2]
	crts[i, "CIhigh"] <- confint(iv2b)[8]
}
summary(crts$est) 
summary(crts$p) 

#plot these
pdf("SI_blackdef_crtyearjackknife.pdf")
crtyears$crtyrnum <- as.numeric(crtyears$crtyrnum)
plot(crtyears$crtyrnum, crtyears$est, main="Estimated Effects of Jail on Voting for Black Defendants, \n Dropping Each Courtroom-Year", ylab="Estimated Effect on 2012 Turnout", xlim=c(0, 76), ylim=c(-.28, .02), xlab="Index")
segments(crtyears$crtyrnum, crtyears$CIlow, crtyears$crtyrnum, crtyears$CIhigh)
abline(h=-.134, col="blue", lwd=2)
abline(h=0, col="lightgray", lwd=2, lty=3)
dev.off() #Figure A9 in SI

pdf("SI_blackdef_crtjackknife.pdf")
crts$crtyrnum <- as.numeric(crts$crtyrnum)
plot(crts$crtyrnum, crts$est, main="Estimated Effects of Jail on Voting for Black Defendants, \n Dropping Each Courtroom", ylab="Estimated Effect on 2012 Turnout", xlim=c(0, 16), ylim=c(-.28, .02), xlab="Index")
segments(crts$crtyrnum, crts$CIlow, crts$crtyrnum, crts$CIhigh)
abline(h=-.134, col="blue", lwd=2)
abline(h=0, col="lightgray", lwd=2, lty=3)
dev.off() #Figure A10 in SI

#########################################################################################################
# per reviewer comment, look at other subgroups (rule out just noise)

#what I'm doing on race (main spec, subsetting):
black <- voter1[def_rac=="B",]
white <- voter1[def_rac=="W",]

black[,crtjailavg1u:= mean(jail, na.rm=T), by=list(crt, fyear)]
white[,crtjailavg1u:= mean(jail, na.rm=T), by=list(crt, fyear)]

fs2b <- lm(jail ~ crtjailavg1u + fyear, data=black); summary(fs2b)
iv2b <- ivreg(vote2012 ~ jail +fyear | crtjailavg1u +fyear, data = black); summary(iv2b)
fs2w <- lm(jail ~ crtjailavg1u + fyear, data=white); summary(fs2b)
iv2w <- ivreg(vote2012 ~ jail +fyear | crtjailavg1u +fyear, data = white); summary(iv2w)

#same deal for gender
men <- voter1[male==T,]
women <- voter1[male==F,]

men[,crtjailavg1u:= mean(jail, na.rm=T), by=list(crt, fyear)]
women[,crtjailavg1u:= mean(jail, na.rm=T), by=list(crt, fyear)]

fs2m <- lm(jail ~ crtjailavg1u + fyear, data=men); summary(fs2m)
iv2m <- ivreg(vote2012 ~ jail +fyear | crtjailavg1u +fyear, data = men); summary(iv2m)
fs2f <- lm(jail ~ crtjailavg1u + fyear, data=women); summary(fs2f)
iv2f <- ivreg(vote2012 ~ jail +fyear | crtjailavg1u +fyear, data = women); summary(iv2f)

#same deal for age
over30 <- voter1[over30==T,]
under30 <- voter1[over30==F,]

over30[,crtjailavg1u:= mean(jail, na.rm=T), by=list(crt, fyear)]
under30[,crtjailavg1u:= mean(jail, na.rm=T), by=list(crt, fyear)]

fs2o <- lm(jail ~ crtjailavg1u + fyear, data=over30); summary(fs2o)
iv2o <- ivreg(vote2012 ~ jail +fyear | crtjailavg1u +fyear, data = over30); summary(iv2o)
fs2u <- lm(jail ~ crtjailavg1u + fyear, data=under30); summary(fs2u)
iv2u <- ivreg(vote2012 ~ jail +fyear | crtjailavg1u +fyear, data = under30); summary(iv2u)

#same deal for charge severity
classA <- voter1[classA==T,]
classB <- voter1[classA==F,]

classA[,crtjailavg1u:= mean(jail, na.rm=T), by=list(crt, fyear)]
classB[,crtjailavg1u:= mean(jail, na.rm=T), by=list(crt, fyear)]

fs2A <- lm(jail ~ crtjailavg1u + fyear, data=classA); summary(fs2A)
iv2A <- ivreg(vote2012 ~ jail +fyear | crtjailavg1u +fyear, data = classA); summary(iv2A)
fs2B <- lm(jail ~ crtjailavg1u + fyear, data=classB); summary(fs2B)
iv2B <- ivreg(vote2012 ~ jail +fyear | crtjailavg1u +fyear, data = classB); summary(iv2B)

stargazer(iv2m, iv2f, iv2o, iv2u,iv2A, iv2B , omit=c("fyear"), omit.labels=c("Year dummies"), 
add.lines=list( c("First Stage F-Statistic", round(summary(fs2m)$fstatistic[1], digits=2), round(summary(fs2f)$fstatistic[1], digits=2), round(summary(fs2o)$fstatistic[1], digits=2), round(summary(fs2u)$fstatistic[1], digits=2), round(summary(fs2A)$fstatistic[1], digits=2), round(summary(fs2B)$fstatistic[1], digits=2))),
 label="subgroups", out="othersubgroups.tex", title="IV estimates: Jail sentences on voting (other subgroups)", omit.stat=c("rsq", "ser"), dep.var.labels   = "Voted 2012", column.labels = c("Men", "Women", "Over 30", "Under 30", "Class A", "Class B"))
#Table A33 in SI

###################################################################
## see what happens if you just throw out some matches (for SI)
###################################################################

set.seed(02145)
estimates_overall <- as.data.frame(matrix(nrow=500, ncol=6))
colnames(estimates_overall) <- c("Percentdrop", "Trial", "PtEst", "p", "CIlow", "CIhigh")
estimates_black <- as.data.frame(matrix(nrow=500, ncol=6))
colnames(estimates_black) <- c("Percentdrop", "Trial", "PtEst", "p", "CIlow", "CIhigh")
jn <- 10

for (i in 1:50){ #loop over percentage of matches to drop
	for (j in 1:jn){ #do each percentage jn times
		estimates_overall$Percentdrop[((i-1)*jn)+j] <- estimates_black$Percentdrop[((i-1)*jn)+j] <- i
		estimates_overall$Trial[((i-1)*jn)+j] <- estimates_black$Trial[((i-1)*jn)+j] <- j

		voter2 =copy(voter1) #bad for memory, but avoids weird linkages.
		rows <- which(voter2$registered==1) #see which rows have been matched to voter file
		drops <- sample(rows, length(rows)*(i/100)) #sample some of them 
		voter2[drops, vote2012 := 0] #set voting to 0 regardless of what it was (imagine there were no match made)

		viv1.1 <- ivreg(vote2012 ~ jail + fyear| crtjailavg1 + fyear, data=voter2); summary(viv1.1) #main overall specification
		estimates_overall$PtEst[((i-1)*jn)+j] <- coef(viv1.1)[2]
		estimates_overall$p[((i-1)*jn)+j] <- summary(viv1.1)$coef[20]
		estimates_overall$CIlow[((i-1)*jn)+j] <- confint(viv1.1)[2]
		estimates_overall$CIhigh[((i-1)*jn)+j] <-  confint(viv1.1)[8]

		##subset the data, regenerate the instruments, and go.
		black <- voter2[def_rac=="B",]
		white <- voter2[def_rac=="W",]

		black[,crtjailavg1u:= mean(jail, na.rm=T), by=list(crt, fyear)]
		white[,crtjailavg1u:= mean(jail, na.rm=T), by=list(crt, fyear)]

		iv2b <- ivreg(vote2012 ~ jail +fyear | crtjailavg1u +fyear, data = black); summary(iv2b)
		estimates_black$PtEst[((i-1)*jn)+j] <- coef(iv2b)[2]
		estimates_black$p[((i-1)*jn)+j] <- summary(iv2b)$coef[20]
		estimates_black$CIlow[((i-1)*jn)+j] <- confint(iv2b)[2]
		estimates_black$CIhigh[((i-1)*jn)+j] <- confint(iv2b)[8]
		rm(iv2b, viv1.1, voter2)
}
}

#plot these
pdf("SI_droppingmatches_black.pdf")
plot(estimates_black$Percentdrop, estimates_black$PtEst, ylim=c(-.25, .05), main="Estimated Effects of Jail on Voting for Black Defendants, \n Sensitivity to Dropped Matches", xlab="Percentage of Found Matches Dropped", ylab="Estimated Effect on 2012 Turnout")
abline(h=0, col="lightgray", lty=2, lwd=2)
segments(estimates_black$Percentdrop, estimates_black$CIlow, estimates_black$Percentdrop, estimates_black$CIhigh)
abline(h=-.134, col="blue", lwd=2)
dev.off()

pdf("SI_droppingmatches_all.pdf")
plot(estimates_overall$Percentdrop, estimates_overall$PtEst, main="Estimated Effects of Jail on Voting for All Defendants, \n Sensitivity to Dropped Matches", xlab="Percentage of Found Matches Dropped", ylab="Estimated Effect on 2012 Turnout")
abline(h=0, col="lightgray", lty=2, lwd=2)
segments(estimates_overall$Percentdrop, estimates_overall$CIlow, estimates_overall$Percentdrop, estimates_overall$CIhigh)
abline(h=-.045, col="blue", lwd=2)
dev.off()
#Figure A2 in SI

#########################################################################################################
# quick back-of-the-envelope stuff about who's affected (talked through in SI)

jailadmissions12 <- 11600000 #from BJS report
jailpctblack <- .369 #BJS
jailpctconvicted <- .394 #BJS

#but how many people "convicted" are actually misdemeanants (not just felons with short sentences or awaiting transfer?
dailyjail <- 735983 #from BJS, avg daily jail pop 2012
prisonersinjail <- 83500 #from other BJS report, daily prisoners held in local jail 
# see Table 6, p.9 here: http://felonvoting.procon.org/sourcefiles/corrrectional-populations-in-2013.pdf
jailpctmisdemeanor <- jailpctconvicted - prisonersinjail/dailyjail

firsttime_harris <- 2/3 #these are rough guesses just from looking at their yearly case totals and first-time case totals 
firsttime_harris <- .5

affectedpop <- (jailadmissions12*.5)*jailpctblack*jailpctmisdemeanor*firsttime_harris*4

#what if LATE generalized perfectly?
affectedpop*.13
#what if it generalized to half the affected pop?
affectedpop*.5*.13
#two-thirds? one third?
affectedpop*(2/3)*.13
affectedpop*(1/3)*.13

## different approach: just scale up Harris County to the nation?
HarrisCoSample <- 31524 #Black sample 2008-2012
HarrisCojail <- 16192

Harriscoblackpop <- 4441370*.2 #http://quickfacts.census.gov/qfd/states/48/48201.html
harriscoblackjailrt <- HarrisCojail / Harriscoblackpop #over 4year period
#multiply this by national Black population
nationwidejailed <- harriscoblackjailrt * 42000000 #https://www.census.gov/newsroom/releases/archives/facts_for_features_special_editions/cb12-ff01.html

#apply LATE
nationwidejailed*.13
#and just harris co?
HarrisCojail *.13

#########################################################################################################
## And now output some stuff for alternative estimators

#leave-one-out means
voter1$fyear_all_pred <- rep(NA, nrow(voter1)) 
voter1$fyear_judge_pred <- rep(NA, nrow(voter1))
for (i in 1:nrow(voter1)){
	crtval <- voter1$crt[i]
	varval <- voter1$fyear[i] 
	subdef <- voter1[-i,] #drop this individual
	voter1$fyear_all_pred[i] <- mean(subdef[subdef$fyear==varval]$jail) #mean for all others in cell
	voter1$fyear_judge_pred[i] <- mean(subdef[subdef$crt==crtval & subdef$fyear==varval]$ jail) #by judge
}

names(voter1)
voterout <- subset(voter1, select=c("vote2012", "jail", "fyear", "black", "def_rac", "def_sex", "ageatfile", "fyear_judge_pred", "crt", "crt_1", "crt_2", "crt_3", "crt_4", "crt_5", "crt_6", "crt_7", "crt_8", "crt_9", "crt_10", "crt_11", "crt_12", "crt_13", "crt_14", "crt_15", "crtjailavg1"))
dim(voterout)
t <- ivreg(vote2012 ~ jail + fyear | fyear_judge_pred + fyear, data=voterout); summary(t)

black <- voter1[def_rac=="B",]
black$fyear_all_pred <- rep(NA, nrow(black)) 
black$fyear_judge_pred <- rep(NA, nrow(black))
for (i in 1:nrow(black)){
	crtval <- black$crt[i]
	varval <- black$fyear[i] 
	subdef <- black[-i,] 
	black$fyear_all_pred[i] <- mean(subdef[subdef$fyear==varval]$jail) 
	black$fyear_judge_pred[i] <- mean(subdef[subdef$crt==crtval & subdef$fyear==varval]$ jail) #by judge
}

black[,crtjailavg1u:= mean(jail, na.rm=T), by=list(crt, fyear)]
voteroutb <- subset(black, select=c("vote2012", "jail", "fyear", "black", "def_rac", "def_sex", "ageatfile", "mostsevcharge",  "fyear_judge_pred", "crt", "crt_1", "crt_2", "crt_3", "crt_4", "crt_5", "crt_6", "crt_7", "crt_8", "crt_9", "crt_10", "crt_11", "crt_12", "crt_13", "crt_14", "crt_15", "crtjailavg1", "crtjailavg1u")) #, "crtjailavg2"))
dim(voteroutb)

library(foreign)
write.csv(voteroutb, file="voteroutb_limltest_june15.csv")
#can now go run "stata_LIML_replication.do" to generate the SI table with alternative IV estimators

#make table with leave-one-out means here
fs2b <- lm(jail ~ crtjailavg1u + fyear, data=voteroutb); summary(fs2b)
iv2b <- ivreg(vote2012 ~ jail +fyear | crtjailavg1u +fyear, data = voteroutb); summary(iv2b)

fs2b_loo <- lm(jail ~ fyear_judge_pred + fyear, data=voteroutb); summary(fs2b_loo)
iv2b_loo <- ivreg(vote2012 ~ jail +fyear | fyear_judge_pred +fyear, data = voteroutb); summary(iv2b_loo)

voteroutb[def_sex=="M",male:=1]
voteroutb[def_sex=="F",male:=0]
iv2b_loo.c <- ivreg(vote2012 ~ jail +fyear + male + mostsevcharge| fyear_judge_pred +fyear + male + mostsevcharge, data = voteroutb); summary(iv2b_loo.c)

#also limit to more plausible ages
iv2b_loo.a <- ivreg(vote2012 ~ jail +fyear | fyear_judge_pred +fyear, data = voteroutb[ageatfile >= (18*365) & ageatfile <= (60*365),]); summary(iv2b_loo.a)

iv2b_loo.ca <- ivreg(vote2012 ~ jail +fyear + male + mostsevcharge| fyear_judge_pred +fyear + male + mostsevcharge, data = voteroutb[ageatfile >= (18*365) & ageatfile <= (60*365),]); summary(iv2b_loo.ca)


stargazer(iv2b_loo, iv2b_loo.c, iv2b_loo.ca, omit=c("fyear"), omit.labels=c("Year dummies"), label="LeaveOneOutIV", out="SI_LOO_IV.tex", title="IV estimates (Leave-One-Out Means): Black Defendants", omit.stat=c("rsq", "ser"), 
covariate.labels=c("Jail", "Male", "Charge Severity")) #Table A25 in SI

#########################################################################################################
## Look at homeownership for SI
homeowners <- voter1[homeownership ==1,] 
nonhomeowners <- voter1[homeownership == 0,] 
dim(homeowners); dim(nonhomeowners) #the omitted rows are people outside Harris County 

fs2_m1 <- lm(jail ~ crtjailavg1 + fyear, data=homeowners); summary(fs2_m1) 
iv2_m1 <- ivreg(vote2012 ~ jail +fyear | crtjailavg1 +fyear, data = homeowners); summary(iv2_m1) 
iv2_m2 <- ivreg(vote2012 ~ jail +fyear + male + mostsevcharge +ageatfile +black | crtjailavg1 +fyear+ male + mostsevcharge +ageatfile +black, data = homeowners); summary(iv2_m2) 

iv2_n1 <- ivreg(vote2012 ~ jail +fyear | crtjailavg1 +fyear, data = nonhomeowners); summary(iv2_n1) #smaller
iv2_n2 <- ivreg(vote2012 ~ jail +fyear  + male + mostsevcharge +ageatfile  +black| crtjailavg1 +fyear + male + mostsevcharge +ageatfile  +black, data = nonhomeowners); summary(iv2_n2) 

iv2.i <- ivreg(vote2012 ~ jail +fyear + homeownership + jail*homeownership| crtjailavg1 +fyear + homeownership +crtjailavg1*homeownership, data = voter1); summary(iv2.i) #interaction

#Table A3 in the SI
stargazer(iv2_m1,iv2_m2, iv2_n1, iv2_n2, omit=c("fyear"), omit.labels=c("Year dummies"), label="homeownersIV", out="homeownersIV.tex", title="IV estimates: Jail sentences on voting, by homeownership", covariate.labels=c("Jail", "Male", "Charge Severity", "Age at Filing", "Black") , omit.stat=c("rsq", "ser"), dep.var.labels   = "Voted 2012", column.labels = c("Homeowners","Homeowners", "Others", "Others"))


#########################################################################################################
## Next: look at Spanish surnames for SI

#for sake of non-overlapping subsets, limit to W
hispanic <- voter1[spanishsurname==T & def_rac=="W",]
hispanic[,crtjailavg1u:= mean(jail, na.rm=T), by=list(crt, fyear)]
fs2h <- lm(jail ~ crtjailavg1u + fyear, data=hispanic); summary(fs2h)
iv2h <- ivreg(vote2012 ~ jail +fyear | crtjailavg1u +fyear, data = hispanic); summary(iv2h)

anglo <- voter1[def_rac=="W" & is.na(spanishsurname),]
anglo[,crtjailavg1u:= mean(jail, na.rm=T), by=list(crt, fyear)]
fs2a<- lm(jail ~ crtjailavg1u + fyear, data=anglo); summary(fs2a)
iv2a <- ivreg(vote2012 ~ jail +fyear | crtjailavg1u +fyear, data = anglo); summary(iv2a)

stargazer(fs2h,iv2h,fs2a,iv2a, omit=c("fyear"), omit.labels=c("Year dummies"), label="hispanicIV", out="SI_hispanicIV.tex", title="IV estimates: Jail sentences on voting, Latino (Columns 1-2) and Anglo (Columns 3-4) defendants", omit.stat=c("rsq", "ser", "adj.rsq"),
covariate.labels=c("Courtroom instrument", "Jail")) #table A32

## test diff
voter1[is.na(spanishsurname), spanishsurname:=0]
fs2.i <- lm(jail ~ crtjailavg1*fyear*spanishsurname, data=voter1[def_rac=="W"])
iv2.i <- ivreg(vote2012 ~ jail +fyear + spanishsurname + jail*spanishsurname| crtjailavg1 +fyear + spanishsurname +crtjailavg1*spanishsurname, data = voter1[def_rac=="W"]); summary(iv2.i)
#looks quite similar.
table(voter1[def_rac=="W"]$spanishsurname)

stargazer(iv2.i,  omit.stat=c("rsq", "ser"), omit=c("fyear"), omit.labels=c("Year dummies"), covariate.labels=c("Jail", "Hispanic", "Jail x Hispanic"), label = "hispanicinteractionIV", out="SI_hispanicinteractionIV.tex", title="Jail's Effect on Voting for Hispanic Defendants (Interaction Model)", add.lines=list(c("First Stage F-Statistic", round(summary(fs2.i)$fstatistic[1], digits=2)))) 
#Table A24 in SI


#########################################################################################################
## Next: look at jail vs. conviction iterated subsets (non-focal treatments concern) for SI

voter1[, cases:=1]
courtstable <- voter1[, list(totalcases = sum(cases), jailrate = sum(jail)/sum(cases), probationrate = sum(probation)/sum(cases), finerate = sum(fine)/sum(cases), convrate = sum(anyconv)/sum(cases) ), by=list(crt, fyear)]
setkey(courtstable, convrate)
courtstable
courtstable; dim(courtstable)
library(stats)

blackvoter1 <- voter1[def_rac=="B",]
blackvoter1[,crtjailavg1u:= mean(jail, na.rm=T), by=list(crt, fyear)]

v10 <- c(1:51)
blackdf10 <- data.frame(matrix(NA, ncol=9, nrow=51))
colnames(blackdf10) <- c("startrow", "minconv", "maxconv", "convrange", "FS_Fstat", "ivcoef", "ivpval", "ivleft95", "ivright95")
for (i in 1:length(v10)){
	blackdf10[i,1] <- i 
	chunk1 <- courtstable[(v10[i]:(v10[i]+10)),] #grab crt/yr values for this subset
	voter3 <- blackvoter1
	voter3[, keep:= 0]
	for (j in 1:nrow(chunk1)){ #now do the actual subset setup
		court <- chunk1$crt[j]
		year <- chunk1$fyear[j]
		voter3[crt==court & fyear==year,keep:=1]	
	}	
	voter_chunk1 <- subset(voter3, voter3$keep==1)	#and keep only obs in these 10.
	#also want to store the range of conviction rates in this subset.
	blackdf10[i,2] <-summary(voter_chunk1$crtconvrate1)[1]
	blackdf10[i,3] <-summary(voter_chunk1$crtconvrate1)[6]
	blackdf10[i,4] <-summary(voter_chunk1$crtconvrate1)[6]-summary(voter_chunk1$crtconvrate1)[1]
	fs2.c <- lm(jail ~ crtjailavg1u + fyear+ mostsevcharge, data=voter_chunk1) #then run FS 		
	blackdf10[i,5] <- summary(fs2.c)$fstatistic[1]
	#okay and then iv (will ignore if weak instruments)
	iv2.c <- ivreg(vote2012 ~ jail +fyear + mostsevcharge| crtjailavg1u +fyear + mostsevcharge, data = voter_chunk1)
	blackdf10[i,6] <- coef(summary(iv2.c))["jail","Estimate"]
	blackdf10[i,7] <- coef(summary(iv2.c))["jail","Pr(>|t|)"]
	blackdf10[i,8] <- confint(iv2.c, "jail")[1]
	blackdf10[i,9] <- confint(iv2.c, "jail")[2]
}

blackdf10keep <- blackdf10[blackdf10$FS_Fstat>10,]
blackdf10keep_sort <- blackdf10keep[with(blackdf10keep, order(convrange)), ]

#plot(blackdf10keep_sort$convrange, blackdf10keep_sort$ivcoef, ylim=c(-1, .4),xlim=c(0,.065),
#	main = "2SLS estimates for Black defendants, subsets of 10 courtroom-years",
#	xlab = "Range of conviction rates", ylab = "Estimated Treatment Effect of Jail on Voting")
#segments( blackdf10keep_sort$convrange, blackdf10keep_sort$ivleft95,  blackdf10keep_sort$convrange,blackdf10keep_sort$ivright95)
#abline(h=0, lty=2)

##same plot for 15-unit subset
v15 <- c(1:46)
blackdf15 <- data.frame(matrix(NA, ncol=9, nrow=length(v15)))
colnames(blackdf15) <- c("startrow", "minconv", "maxconv", "convrange", "FS_Fstat", "ivcoef", "ivpval", "ivleft95", "ivright95")
for (i in 1:length(v15)){
	#numbers <- (v10[i]:(v10[i]+10))
	blackdf15[i,1] <- i #actually, just set up the subset and run everything in here.
	chunk1 <- courtstable[(v10[i]:(v10[i]+15)),] #grab crt/yr values for this subset
	voter3 <- blackvoter1
	voter3[, keep:= 0]
	for (j in 1:nrow(chunk1)){ #now do the actual subset setup
		court <- chunk1$crt[j]
		year <- chunk1$fyear[j]
		voter3[crt==court & fyear==year,keep:=1]	
	}	
	voter_chunk1 <- subset(voter3, voter3$keep==1)	#and keep only obs in these 10.
	#also want to store the range of conviction rates in this subset.
	blackdf15[i,2] <-summary(voter_chunk1$crtconvrate1)[1]
	blackdf15[i,3] <-summary(voter_chunk1$crtconvrate1)[6]
	blackdf15[i,4] <-summary(voter_chunk1$crtconvrate1)[6]-summary(voter_chunk1$crtconvrate1)[1]
	fs2.c <- lm(jail ~ crtjailavg1u + fyear+ mostsevcharge, data=voter_chunk1) #then run FS 		
	blackdf15[i,5] <- summary(fs2.c)$fstatistic[1]
	#okay and then iv (will ignore if weak instruments)
	iv2.c <- ivreg(vote2012 ~ jail +fyear + mostsevcharge| crtjailavg1u +fyear + mostsevcharge, data = voter_chunk1)
	blackdf15[i,6] <- coef(summary(iv2.c))["jail","Estimate"]
	blackdf15[i,7] <- coef(summary(iv2.c))["jail","Pr(>|t|)"]
	blackdf15[i,8] <- confint(iv2.c, "jail")[1]
	blackdf15[i,9] <- confint(iv2.c, "jail")[2]
}

blackdf15keep <- blackdf15[blackdf15$FS_Fstat>10,]
blackdf15keep_sort <- blackdf15keep[with(blackdf15keep, order(convrange)), ]

#plot(blackdf15keep_sort$convrange, blackdf15keep_sort$ivcoef, ylim=c(-1, .4),xlim=c(0,.065),
#	main = "2SLS estimates for Black defendants, subsets of 15 courtroom-years",
#	xlab = "Range of conviction rates", ylab = "Estimated Treatment Effect of Jail on Voting")
#segments( blackdf15keep_sort$convrange, blackdf15keep_sort$ivleft95,  blackdf15keep_sort$convrange,blackdf15keep_sort$ivright95)
#abline(h=0, lty=2)

##same plot for 20-unit subset
v20 <- c(1:41)
blackdf20 <- data.frame(matrix(NA, ncol=9, nrow=length(v20)))
colnames(blackdf20) <- c("startrow", "minconv", "maxconv", "convrange", "FS_Fstat", "ivcoef", "ivpval", "ivleft95", "ivright95")
for (i in 1:length(v20)){
	blackdf20[i,1] <- i 
	chunk1 <- courtstable[(v10[i]:(v10[i]+15)),] #grab crt/yr values for this subset
	voter3 <- blackvoter1
	voter3[, keep:= 0]
	for (j in 1:nrow(chunk1)){ #now do the actual subset setup
		court <- chunk1$crt[j]
		year <- chunk1$fyear[j]
		voter3[crt==court & fyear==year,keep:=1]	
	}	
	voter_chunk1 <- subset(voter3, voter3$keep==1)	#and keep only obs in these 10.
	#also want to store the range of conviction rates in this subset.
	blackdf20[i,2] <-summary(voter_chunk1$crtconvrate1)[1]
	blackdf20[i,3] <-summary(voter_chunk1$crtconvrate1)[6]
	blackdf20[i,4] <-summary(voter_chunk1$crtconvrate1)[6]-summary(voter_chunk1$crtconvrate1)[1]
	fs2.c <- lm(jail ~ crtjailavg1u + fyear+ mostsevcharge, data=voter_chunk1) #then run FS 		
	blackdf20[i,5] <- summary(fs2.c)$fstatistic[1]
	#okay and then iv (will ignore if weak instruments)
	iv2.c <- ivreg(vote2012 ~ jail +fyear + mostsevcharge| crtjailavg1u +fyear + mostsevcharge, data = voter_chunk1)
	blackdf20[i,6] <- coef(summary(iv2.c))["jail","Estimate"]
	blackdf20[i,7] <- coef(summary(iv2.c))["jail","Pr(>|t|)"]
	blackdf20[i,8] <- confint(iv2.c, "jail")[1]
	blackdf20[i,9] <- confint(iv2.c, "jail")[2]
}

blackdf20keep <- blackdf20[blackdf20$FS_Fstat>10,]
blackdf20keep_sort <- blackdf20keep[with(blackdf20keep, order(convrange)), ]

#plot(blackdf20keep_sort$convrange, blackdf20keep_sort$ivcoef, ylim=c(-1, .4),xlim=c(0,.065),
#	main = "2SLS estimates for Black defendants, subsets of 20 courtroom-years",
#	xlab = "Range of conviction rates", ylab = "Estimated Treatment Effect of Jail on Voting")
#segments( blackdf20keep_sort$convrange, blackdf20keep_sort$ivleft95,  blackdf20keep_sort$convrange,blackdf20keep_sort$ivright95)
#abline(h=0, lty=2)


#stick 10, 15, 20 into one plot
pdf("conviction_subsets_101520_byfstat.pdf", height = 6, width = 12) 
par(mfrow=c(1,3))

plot(blackdf10keep_sort$FS_Fstat, blackdf10keep_sort$ivcoef, ylim=c(-1, .4),xlim=c(0,40),
	main = "Subsets of 10 courtroom-years",
	xlab = "First-stage F-statistic", ylab = "Estimated Treatment Effect of Jail on Voting")
segments( blackdf10keep_sort$FS_Fstat, blackdf10keep_sort$ivleft95,  blackdf10keep_sort$FS_Fstat,blackdf10keep_sort$ivright95)
abline(h=0, lty=2)

plot(blackdf15keep_sort$FS_Fstat, blackdf15keep_sort$ivcoef, ylim=c(-1, .4),xlim=c(0,40),
	main = "Subsets of 15 courtroom-years",
	xlab = "First-stage F-statistic", ylab = "Estimated Treatment Effect of Jail on Voting")
segments( blackdf15keep_sort$FS_Fstat, blackdf15keep_sort$ivleft95,  blackdf15keep_sort$FS_Fstat,blackdf15keep_sort$ivright95)
abline(h=0, lty=2)

plot(blackdf20keep_sort$FS_Fstat, blackdf20keep_sort$ivcoef, ylim=c(-1, .4),xlim=c(0,40),
	main = "Subsets of 20 courtroom-years",
	xlab = "First-stage F-statistic", ylab = "Estimated Treatment Effect of Jail on Voting")
segments( blackdf20keep_sort$FS_Fstat, blackdf20keep_sort$ivleft95,  blackdf20keep_sort$FS_Fstat,blackdf20keep_sort$ivright95)
abline(h=0, lty=2)
dev.off()
#Figure A11 in SI


#########################################################################################################
## Next: pull in the extra-large dataset (2000-on) to look at effect persistence for SI.

load("defendants_voter1_manyyears_deidentified.Rdata") #voter1long
voter1l <- voter1long[casedate < "2012-11-06",]
viv1.1 <- ivreg(vote2012 ~ jail + fyear| crtjailavg1 + fyear, data=voter1l); summary(viv1.1)
stargazer(viv1.1)

viv1.2 <- ivreg(vote2012 ~ jail + fyear| crtjailavg1 + fyear, data=voter1l[fyear=="2000"|fyear=="2001"|fyear=="2002"|fyear=="2003"|fyear=="2004"|fyear=="2005"|fyear=="2006"|fyear=="2007"|fyear=="2008", ]); summary(viv1.2)

f1.1 <- lm(jail ~ crtjailavg1, data=voter1l); summary(f1.1) 
f1.2 <- lm(jail ~ crtjailavg1, data=voter1l[fyear=="2000"|fyear=="2001"|fyear=="2002"|fyear=="2003"|fyear=="2004"|fyear=="2005"|fyear=="2006"|fyear=="2007"|fyear=="2008", ]); summary(f1.2) 

black <- voter1l[def_rac=="B",]
black[,crtjailavg1u:= mean(jail, na.rm=T), by=list(crt, fyear)]

iv2b <- ivreg(vote2012 ~ jail +fyear | crtjailavg1u +fyear, data = black); summary(iv2b)

iv2bearly <- ivreg(vote2012 ~ jail +fyear | crtjailavg1u +fyear, data = black[fyear=="2000"| fyear=="2001"|fyear=="2002"|fyear=="2003"|fyear=="2004"|fyear=="2005"|fyear=="2006"|fyear=="2007"|fyear=="2008"]); summary(iv2bearly) 

f2b <- lm(jail ~ fyear + crtjailavg1u , data = black)
f2bearly <- lm(jail ~ fyear + crtjailavg1u , data = black[fyear=="2000"| fyear=="2001"|fyear=="2002"|fyear=="2003"|fyear=="2004"|fyear=="2005"|fyear=="2006"|fyear=="2007"|fyear=="2008"])

stargazer(viv1.1, viv1.2, iv2b, iv2bearly , 
	omit=c("fyear"), omit.labels=c("Year dummies"), 
	add.lines=list(c("First Stage F-Statistic", round(summary(f1.1)$fstatistic[1], digits=2),  round(summary(f1.2)$fstatistic[1], digits=2),  round(summary(f2b)$fstatistic[1], digits=2), round(summary(f2bearly)$fstatistic[1], digits=2)), c("2009-2012 data included", "Yes", "No", "Yes", "No")), 
	label="overtimeIV", out="overtimeIV.tex",
	title="IV estimates: Jail sentences on voting, 2000-2012", 
	omit.stat=c("rsq", "ser"), dep.var.labels   = "Voted 2012",covariate.labels=c("Jail", "Constant"),
	column.labels = c("All defendants", "Black defendants"), column.separate = c(2, 2), star.cutoffs=c(0.05), notes="$^{*}$p$<$0.05", notes.append=FALSE) ##Table A31 in the SI.

#########################################################################################################
## Similarly, use the extended-time dataset to look at the post-2012 cases placebo from SI Section 1.2

postelection <- voter1long[casedate > "2012-11-06",]

fs2 <- lm(jail ~ crtjailavg1 + fyear, data=postelection); summary(fs2)
iv2 <- ivreg(vote2012 ~ jail +fyear | crtjailavg1 +fyear, data = postelection); summary(iv2)

##actually-- subset the data, regenerate the instruments, and go.
black <- postelection[def_rac=="B",]
white <- postelection[def_rac=="W",]

black[,crtjailavg1u:= mean(jail, na.rm=T), by=list(crt, fyear)]
white[,crtjailavg1u:= mean(jail, na.rm=T), by=list(crt, fyear)]

fs2b <- lm(jail ~ crtjailavg1u + fyear, data=black); summary(fs2b)
iv2b <- ivreg(vote2012 ~ jail +fyear | crtjailavg1u +fyear, data = black); summary(iv2b)

fs2w <- lm(jail ~ crtjailavg1u + fyear, data=white); summary(fs2w)
iv2w <- ivreg(vote2012 ~ jail +fyear | crtjailavg1u +fyear, data = white); summary(iv2w)

## make this table
stargazer(iv2,iv2b, iv2w, omit=c("fyear"), omit.labels=c("Year dummies"), add.lines=list(c("First Stage F-Statistic", round(summary(fs2)$fstatistic[1], digits=2),round(summary(fs2b)$fstatistic[1], digits=2), round(summary(fs2w)$fstatistic[1], digits=2))), label="placeboraceIV", out="placeboraceIV1.tex",covariate.labels=c("Jail", "Constant"), title="Placebo IV estimates: Jail on pre-arrest voting", omit.stat=c("rsq", "ser"), column.labels = c("All Defendants","Black Defendants", "White Defendants"), star.cutoffs=c(0.05), notes="$^{*}$p$<$0.05", notes.append=FALSE)
##Table A2 in SI



#########################################################################################################
#now, load in deidentified dataset with neighborhood pov. rates merged in
load("defendants_join2d_deidentified.Rdata")
dim(join2d)
summary(join2d$povrate)

h = hist(join2d$povrate)
h$density = h$counts/sum(h$counts)*100
plot(h, freq=FALSE, main="Tract Poverty Rate, all defendants", cex.main=.8, ylab="Percentage (of total sample)")

#look at whether we see different effects in more/less poor places for R2. 
blackjoin <- join2d[black==T,] #starting to have relatively few people here
blackjoin[,crtjailavg1u:= mean(jail, na.rm=T), by=list(crt.x, fyear.x)]

#what I think the reviewer was looking for was higher/lower-income areas. split by median poverty rate? a little worried about power here.
povmedian <- summary(blackjoin$povrate)[3]
blackhighpov <- blackjoin[povrate >povmedian,] 
blacklowpov <-  blackjoin[povrate <povmedian,] 

fs2b_low <- lm(jail ~ crtjailavg1u + fyear.x, data=blacklowpov); summary(fs2b_low)
iv2b_low <- ivreg(vote2012 ~ jail +fyear.x | crtjailavg1u +fyear.x, data = blacklowpov); summary(iv2b_low)

fs2b_high <- lm(jail ~ crtjailavg1u + fyear.x, data=blackhighpov); summary(fs2b_high)
iv2b_high <- ivreg(vote2012 ~ jail +fyear.x | crtjailavg1u +fyear.x, data = blackhighpov); summary(iv2b_high)

#make a table here.
stargazer(iv2b_low, iv2b_high, omit=c("fyear"), omit.labels=c("Year dummies"), label="highlowpovIV", out="SI_highlowpovIV.tex", title="IV estimates: Jail sentences on voting (Black defendants)", omit.stat=c("rsq", "ser"), 
covariate.labels=c("Jail"), column.labels = c("Below-median Poverty", "Above-median Poverty")) 
#Table A4 in SI

#interaction (can we tell these apart?)
blackjoin[,highpov:=0]; blackjoin[povrate > povmedian,highpov:=1]
iv2.int <- ivreg(vote2012 ~ jail +fyear.x + highpov + jail*highpov| crtjailavg1u +fyear.x + highpov +crtjailavg1u*highpov, data = blackjoin); summary(iv2.int) #not really.


